Spaces:

JaceWei
/

PaperShow

Sleeping

App Files Files Community

ZaynZhu commited on 21 days ago

Commit

7c08dc3

0 Parent(s):

Clean version without large assets

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +40 -0
.gitignore +18 -0
Paper2Poster/.gitignore +17 -0
Paper2Poster/LICENSE +21 -0
Paper2Poster/Paper2Poster-eval/create_paper_questions.py +40 -0
Paper2Poster/Paper2Poster-eval/eval_poster_pipeline.py +479 -0
Paper2Poster/Paper2Poster-eval/eval_qa_fix.py +114 -0
Paper2Poster/PosterAgent/LLM_direct_generate.py +103 -0
Paper2Poster/PosterAgent/LLM_direct_generate_beamer.py +189 -0
Paper2Poster/PosterAgent/__init__.py +16 -0
Paper2Poster/PosterAgent/apply_theme.py +281 -0
Paper2Poster/PosterAgent/beamer_pipeline.py +182 -0
Paper2Poster/PosterAgent/create_dataset.py +69 -0
Paper2Poster/PosterAgent/deoverflow.py +234 -0
Paper2Poster/PosterAgent/deoverflow_parallel.py +485 -0
Paper2Poster/PosterAgent/fill_and_style.py +215 -0
Paper2Poster/PosterAgent/gen_beamer_code.py +299 -0
Paper2Poster/PosterAgent/gen_outline_layout.py +851 -0
Paper2Poster/PosterAgent/gen_outline_layout_parallel.py +949 -0
Paper2Poster/PosterAgent/gen_poster_content.py +529 -0
Paper2Poster/PosterAgent/gen_pptx_code.py +249 -0
Paper2Poster/PosterAgent/new_pipeline.py +547 -0
Paper2Poster/PosterAgent/parse_raw.py +237 -0
Paper2Poster/PosterAgent/poster_gen_pipeline.py +101 -0
Paper2Poster/PosterAgent/tree_split_layout.py +750 -0
Paper2Poster/README.md +215 -0
Paper2Poster/__init__.py +3 -0
Paper2Poster/camel/__init__.py +25 -0
Paper2Poster/camel/agents/__init__.py +44 -0
Paper2Poster/camel/agents/base.py +29 -0
Paper2Poster/camel/agents/chat_agent.py +1539 -0
Paper2Poster/camel/agents/critic_agent.py +202 -0
Paper2Poster/camel/agents/deductive_reasoner_agent.py +303 -0
Paper2Poster/camel/agents/embodied_agent.py +201 -0
Paper2Poster/camel/agents/knowledge_graph_agent.py +259 -0
Paper2Poster/camel/agents/multi_hop_generator_agent.py +117 -0
Paper2Poster/camel/agents/programmed_agent_instruction.py +203 -0
Paper2Poster/camel/agents/role_assignment_agent.py +141 -0
Paper2Poster/camel/agents/search_agent.py +133 -0
Paper2Poster/camel/agents/task_agent.py +410 -0
Paper2Poster/camel/agents/tool_agents/__init__.py +20 -0
Paper2Poster/camel/agents/tool_agents/base.py +39 -0
Paper2Poster/camel/agents/tool_agents/hugging_face_tool_agent.py +206 -0
Paper2Poster/camel/benchmarks/__init__.py +30 -0
Paper2Poster/camel/benchmarks/apibank.py +565 -0
Paper2Poster/camel/benchmarks/apibench.py +500 -0
Paper2Poster/camel/benchmarks/base.py +152 -0
Paper2Poster/camel/benchmarks/gaia.py +478 -0
Paper2Poster/camel/benchmarks/nexus.py +518 -0
Paper2Poster/camel/benchmarks/ragbench.py +333 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,40 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+*.png filter=lfs diff=lfs merge=lfs -text
+*.jpg filter=lfs diff=lfs merge=lfs -text
+*.pdf filter=lfs diff=lfs merge=lfs -text
+*.wav filter=lfs diff=lfs merge=lfs -text
+*.mp4 filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,18 @@

+input/
+output/Paper2Poster/assets/
+Paper2Video/assets/
+posterbuilder/latex_proj/figures/
+*.png
+*.pdf
+*.jpg
+*.wav
+*.mp4
+__pycache__/
+*.png
+*.jpg
+*.pdf
+*.wav
+*.mp4
+Paper2Poster/assets/
+Paper2Video/assets/
+posterbuilder/latex_proj/figures/

Paper2Poster/.gitignore ADDED Viewed

	@@ -0,0 +1,17 @@

+.env
+.vscode/
+ablations/
+**/__pycache__/
+*_generated_posters/
+*_images_and_tables/
+contents/
+tmp/
+tree_splits/
+eval_results/
+Paper2Poster-data/
+Example/
+*.ipynb
+eval_time_detail_parallel/
+*.sh
+.claude
+CLAUDE.md

Paper2Poster/LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2025 Paper2Poster
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

Paper2Poster/Paper2Poster-eval/create_paper_questions.py ADDED Viewed

	@@ -0,0 +1,40 @@

+from utils.poster_eval_utils import *
+import argparse
+import os
+import json
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--paper_folder', type=str, default=None)
+    parser.add_argument('--model_name', type=str, default='o3')
+    args = parser.parse_args()
+    paper_text = get_poster_text(os.path.join(args.paper_folder, 'paper.pdf'))
+    if args.model_name == '4o':
+        model_type = ModelType.GPT_4O
+    elif args.model_name == 'o3':
+        model_type = ModelType.O3
+    detail_qa = get_questions(paper_text, 'detail', model_type)
+    understanding_qa = get_questions(paper_text, 'understanding', model_type)
+    detail_q, detail_a, detail_aspects = get_answers_and_remove_answers(detail_qa)
+    understanding_q, understanding_a, understanding_aspects = get_answers_and_remove_answers(understanding_qa)
+    final_qa = {}
+    detail_qa = {
+        'questions': detail_q,
+        'answers': detail_a,
+        'aspects': detail_aspects,
+    }
+    understanding_qa = {
+        'questions': understanding_q,
+        'answers': understanding_a,
+        'aspects': understanding_aspects,
+    }
+    final_qa['detail'] = detail_qa
+    final_qa['understanding'] = understanding_qa
+    with open(os.path.join(args.paper_folder, f'{args.model_name}_qa.json'), 'w') as f:
+        json.dump(final_qa, f, indent=4)

Paper2Poster/Paper2Poster-eval/eval_poster_pipeline.py ADDED Viewed

	@@ -0,0 +1,479 @@

+from utils.poster_eval_utils import *
+import json
+from utils.wei_utils import get_agent_config
+import argparse
+from dotenv import load_dotenv
+import tempfile
+import shutil
+import os
+import glob
+import re
+load_dotenv()
+def run_qa_and_update_results(
+    args,
+    raw_folder,
+    gen_poster_path,
+    save_path,
+    single_model_name=None,
+    del_model_name=None,
+):
+    """
+    If single_model_name is provided, run QA for that one model only,
+    but update an existing JSON file (which already contains the other
+    models' results) and re-compute the overall averages.
+    If single_model_name is None, run QA for all models in all_model_names
+    and write a new JSON file.
+    :param raw_folder: Path to folder with 'o3_qa.json'.
+    :param gen_poster_path: Path to the generated poster image.
+    :param save_path: Directory where overall_qa_result.json is saved or should be written.
+    :param all_model_names: List of model names (e.g. ['vllm_qwen_vl', '4o', 'o3']).
+    :param single_model_name: Optional single model name.
+    """
+    # Load the QA data (questions, answers, aspects)
+    qa_dict = json.load(open(os.path.join(raw_folder, 'o3_qa.json'), 'r'))
+    detail_qa = qa_dict['detail']
+    understanding_qa = qa_dict['understanding']
+    # Option A: Single model case
+    if single_model_name is not None:
+        qa_input_token, qa_output_token = 0, 0
+        # Load the existing JSON with all previously computed results
+        existing_path = os.path.join(save_path, "overall_qa_result.json")
+        with open(existing_path, 'r') as f:
+            overall_qa_result = json.load(f)
+        if del_model_name is not None:
+            # Remove the specified model from the existing results
+            if del_model_name in overall_qa_result['qa_result']:
+                del overall_qa_result['qa_result'][del_model_name]
+                print(f"Removed model {del_model_name} from existing results.")
+        if single_model_name in overall_qa_result['qa_result']:
+            print(f"Model {single_model_name} already evaluated. Skipping.")
+            return
+        # Evaluate QA for the single_model_name
+        print(f"Running QA for single model: {single_model_name}")
+        agent_config = get_agent_config(single_model_name)
+        if args.poster_method == 'paper':
+            poster_images = open_folder_images(gen_folder, args.paper_name.replace(' ', '_'), format='jpg')
+        else:
+            poster_images = [Image.open(gen_poster_path)]
+        poster_images = [ensure_under_limit_pil(image) for image in poster_images]
+        detail_accuracy, detail_aspect_accuracy, detail_agent_answers, input_token, output_token = eval_qa_get_answer(
+            poster_input=poster_images,
+            questions=detail_qa['questions'],
+            answers=detail_qa['answers'],
+            aspects=detail_qa['aspects'],
+            input_type='image',
+            agent_config=agent_config
+        )
+        qa_input_token += input_token
+        qa_output_token += output_token
+        print('Detail QA accuracy:', detail_accuracy)
+        understanding_accuracy, understanding_aspect_accuracy, understanding_agent_answers, input_token, output_token = eval_qa_get_answer(
+            poster_input=poster_images,
+            questions=understanding_qa['questions'],
+            answers=understanding_qa['answers'],
+            aspects=understanding_qa['aspects'],
+            input_type='image',
+            agent_config=agent_config
+        )
+        qa_input_token += input_token
+        qa_output_token += output_token
+        print('Understanding QA accuracy:', understanding_accuracy)
+        # Update QA result for this one model
+        # overall_qa_result["qa_result"] is assumed to already have the others
+        overall_qa_result['qa_result'][single_model_name] = {
+            'detail_accuracy': detail_accuracy,
+            'detail_aspect_accuracy': detail_aspect_accuracy,
+            'detail_agent_answers': detail_agent_answers,
+            'understanding_accuracy': understanding_accuracy,
+            'understanding_aspect_accuracy': understanding_aspect_accuracy,
+            'understanding_agent_answers': understanding_agent_answers
+        }
+        # Now re-compute the averages across all models present in the JSON
+        # Grab all model entries from overall_qa_result['qa_result']
+        all_models_in_file = list(overall_qa_result['qa_result'].keys())
+        detail_accs = []
+        understanding_accs = []
+        for m in all_models_in_file:
+            detail_accs.append(overall_qa_result['qa_result'][m]['detail_accuracy'])
+            understanding_accs.append(overall_qa_result['qa_result'][m]['understanding_accuracy'])
+        avg_detail_accuracy = float(np.mean(detail_accs)) if detail_accs else 0.0
+        avg_understanding_accuracy = float(np.mean(understanding_accs)) if understanding_accs else 0.0
+        overall_qa_result['avg_detail_accuracy'] = avg_detail_accuracy
+        overall_qa_result['avg_understanding_accuracy'] = avg_understanding_accuracy
+        # Finally, overwrite the same JSON file with the updated results
+        with open(existing_path, 'w') as f:
+            json.dump(overall_qa_result, f, indent=4)
+        print(f'Input tokens: {qa_input_token}')
+        print(f'Output tokens: {qa_output_token}')
+        print('Updated overall_qa_result.json with single-model results.')
+        print('New average detail accuracy:', avg_detail_accuracy)
+        print('New average understanding accuracy:', avg_understanding_accuracy)
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--paper_name', type=str)
+    parser.add_argument('--base_dir', type=str, default='Paper2Poster-data')
+    parser.add_argument('--poster_method', type=str)
+    parser.add_argument('--poster_image_name', type=str, default='poster.png', choices=['poster.png'])
+    parser.add_argument('--metric', type=str, choices=['stats', 'qa', 'judge', 'word_count', 'token_count', 'figure_count', 'aesthetic_judge'], default='stats')
+    parser.add_argument('--fix', type=str, default=None)
+    parser.add_argument('--del_model_name', type=str, default=None)
+    args = parser.parse_args()
+    raw_poster_path = f'{args.base_dir}/{args.paper_name}/poster.png'
+    raw_folder = f'{args.base_dir}/{args.paper_name}'
+    gen_poster_path = f'{args.poster_method}/{args.base_dir}/{args.paper_name}/{args.poster_image_name}'
+    gen_folder = f'{args.poster_method}/{args.base_dir}/{args.paper_name}'
+    save_path = f'eval_results/{args.paper_name}/{args.poster_method}'
+    os.makedirs(save_path, exist_ok=True)
+    if args.poster_method == 'paper':
+        if args.metric == 'qa' and args.fix is not None:
+            overall_qa_result = json.load(open(f'{save_path}/overall_qa_result.json', 'r'))
+            if args.fix in overall_qa_result['qa_result']:
+                print(f"Model {args.fix} already evaluated. Skipping.")
+                exit(0)
+        # create a temp folder to store the paper
+        # 1) Create a unique temp folder
+        temp_dir = tempfile.mkdtemp(prefix="eval_temp", suffix="_data")
+        # 2) Build your source directory path, replacing spaces
+        paper_slug = args.paper_name.replace(' ', '_')
+        source_dir = os.path.join('<4o_vllm_qwen>_images_and_tables', paper_slug)
+        # 3) Sequentially copy files named "<paper_slug>-<index>.png"
+        index = 1
+        while True:
+            filename = f"{paper_slug}-{index}.png"
+            src_path = os.path.join(source_dir, filename)
+            if not os.path.isfile(src_path):
+                # stop once the next index is missing
+                break
+            shutil.copy2(src_path, os.path.join(temp_dir, filename))
+            index += 1
+            if index > 20 and args.metric != 'word_count' and args.metric != 'token_count':
+                break
+        gen_folder = temp_dir
+        gen_poster_path = f'{args.base_dir}/{args.paper_name}/paper.pdf'
+    print('Evaluating poster:', args.paper_name)
+    if args.metric == 'stats':
+        stats_file = os.path.join(save_path, 'stats_result.json')
+        # 1) load existing results if there are any
+        if os.path.exists(stats_file):
+            with open(stats_file, 'r') as f:
+                stats_result = json.load(f)
+            print(f"Loaded existing stats from {stats_file}")
+        else:
+            stats_result = {}
+        # 2) CLIP similarity
+        if 'CLIP_similarity' not in stats_result:
+            _, cos_sim = compare_folders_with_clip(raw_folder, gen_folder)
+            stats_result['CLIP_similarity'] = cos_sim
+            print(f'CLIP similarity: {cos_sim}')
+        else:
+            print(f"Skipping CLIP similarity (already {stats_result['CLIP_similarity']})")
+        # 3) we only need to regenerate markdown+images if any of the text/image metrics is missing
+        need_eval = any(k not in stats_result for k in ('textual_ppl', 'mixtual_ppl', 'visual_relevance', 'visual_ppl'))
+        if need_eval:
+            images, poster_text, raw_markdown, new_markdown = gen_eval_markdown(
+                args.paper_name,
+                args.poster_method,
+                gen_poster_path
+            )
+            # textual PPL
+            if 'textual_ppl' not in stats_result:
+                textual_ppl = get_ppl(poster_text)
+                stats_result['textual_ppl'] = textual_ppl
+                print(f'Textual PPL: {textual_ppl}')
+            else:
+                print(f"Skipping textual PPL (already {stats_result['textual_ppl']})")
+            # mixtual PPL
+            if 'mixtual_ppl' not in stats_result:
+                mixtual_ppl = get_ppl(new_markdown)
+                stats_result['mixtual_ppl'] = mixtual_ppl
+                print(f'Mixtual PPL: {mixtual_ppl}')
+            else:
+                print(f"Skipping mixtual PPL (already {stats_result['mixtual_ppl']})")
+            # visual relevance
+            if 'visual_relevance' not in stats_result:
+                if images:
+                    sims = [
+                        compute_cosine_similarity(v['image_clip_embedding'],
+                                                v['section_text_clip_embedding'])
+                        for v in images.values()
+                    ]
+                    avg_sim = float(np.mean(sims))
+                    stats_result['visual_relevance'] = avg_sim
+                    print(f'Average cosine similarity: {avg_sim}')
+                else:
+                    stats_result['visual_relevance'] = 0.0
+                    print('No images found in the poster. Set visual_relevance to 0.')
+            else:
+                print(f"Skipping visual relevance (already {stats_result['visual_relevance']})")
+            if 'visual_ppl' not in stats_result or math.isnan(stats_result['visual_ppl']):
+                visual_ppls = []
+                for relative_path, v in images.items():
+                    image_path = os.path.join('eval_poster_markdown', args.paper_name, args.poster_method, relative_path)
+                    image = Image.open(image_path)
+                    visual_ppl = get_visual_ppl(image, poster_text)
+                    visual_ppls.append(visual_ppl)
+                avg_visual_ppl = float(np.mean(visual_ppls))
+                stats_result['visual_ppl'] = avg_visual_ppl
+                print(f'Average visual PPL: {avg_visual_ppl}')
+            else:
+                print("All textual and visual metrics already computed; skipping gen_eval_markdown.")
+        if 'interleaved_ppl' not in stats_result:
+            interleaved_ppl = compute_interleaved_ppl(args.paper_name, args.poster_method)
+            stats_result['interleaved_ppl'] = interleaved_ppl
+            print(f'Interleaved PPL: {interleaved_ppl}')
+        else:
+            print(f"Skipping interleaved PPL (already {stats_result['interleaved_ppl']})")
+        if 'poster_image_ppl' not in stats_result:
+            if args.poster_method == 'paper':
+                poster_images = open_folder_images(gen_folder, args.paper_name.replace(' ', '_'), format='jpg')
+            else:
+                poster_images = [Image.open(gen_poster_path)]
+            poster_image_ppl = compute_poster_image_ppl(poster_images)
+            stats_result['poster_image_ppl'] = poster_image_ppl
+            print(f'Poster image PPL: {poster_image_ppl}')
+        else:
+            print(f"Skipping poster image PPL (already {stats_result['poster_image_ppl']})")
+        # 4) write back updated file
+        with open(stats_file, 'w') as f:
+            json.dump(stats_result, f, indent=4)
+        print(f"Updated stats written to {stats_file}")
+    elif args.metric == 'figure_count':
+        save_file_path = os.path.join(save_path, 'figure_count.json')
+        if os.path.exists(save_file_path):
+            print(f"Figure count already exists at {save_file_path}. Skipping.")
+        else:
+            figure_count = gen_eval_markdown(
+                args.paper_name,
+                args.poster_method,
+                gen_poster_path,
+                figure_count_only=True
+            )
+            with open(save_file_path, 'w') as f:
+                json.dump({'figure_count': figure_count}, f, indent=4)
+            print(f"Figure count saved to {save_file_path}")
+    elif args.metric == 'qa':
+        if args.fix is not None:
+            run_qa_and_update_results(
+                args,
+                raw_folder,
+                gen_poster_path,
+                save_path,
+                single_model_name=args.fix,
+                del_model_name=args.del_model_name
+            )
+        else:
+            overall_qa_result = {}
+            qa_result = {}
+            qa_dict = json.load(open(os.path.join(raw_folder, 'o3_qa.json'), 'r'))
+            detail_qa = qa_dict['detail']
+            understanding_qa = qa_dict['understanding']
+            model_names = [
+                '4o',
+                'o3',
+                '4o-mini'
+            ]
+            if args.poster_method == 'paper':
+                poster_images = open_folder_images(gen_folder, args.paper_name.replace(' ', '_'))
+            else:
+                poster_images = [Image.open(gen_poster_path)]
+            poster_images = [ensure_under_limit_pil(image) for image in poster_images]
+            for model_name in model_names:
+                qa_input_token, qa_output_token = 0, 0
+                print('QA model:', model_name)
+                agent_config = get_agent_config(model_name)
+                detail_accuracy, detail_aspect_accuracy, detail_agent_answers, input_token, output_token = eval_qa_get_answer(
+                    poster_input=poster_images,
+                    questions=detail_qa['questions'],
+                    answers=detail_qa['answers'],
+                    aspects=detail_qa['aspects'],
+                    input_type='image',
+                    agent_config=agent_config
+                )
+                print(f'{model_name} Detail QA accuracy:', detail_accuracy)
+                qa_input_token += input_token
+                qa_output_token += output_token
+                understanding_accuracy, understanding_aspect_accuracy, understanding_agent_answers, input_token, output_token = eval_qa_get_answer(
+                    poster_input=poster_images,
+                    questions=understanding_qa['questions'],
+                    answers=understanding_qa['answers'],
+                    aspects=understanding_qa['aspects'],
+                    input_type='image',
+                    agent_config=agent_config
+                )
+                print(f'{model_name} Understanding QA accuracy:', understanding_accuracy)
+                qa_input_token += input_token
+                qa_output_token += output_token
+                qa_result[model_name] = {
+                    'detail_accuracy': detail_accuracy,
+                    'detail_aspect_accuracy': detail_aspect_accuracy,
+                    'detail_agent_answers': detail_agent_answers,
+                    'understanding_accuracy': understanding_accuracy,
+                    'understanding_aspect_accuracy': understanding_aspect_accuracy,
+                    'understanding_agent_answers': understanding_agent_answers
+                }
+                print(f'{model_name} Input tokens:', qa_input_token)
+                print(f'{model_name} Output tokens:', qa_output_token)
+            # average the results
+            avg_detail_accuracy = np.mean([qa_result[model_name]['detail_accuracy'] for model_name in model_names])
+            avg_understanding_accuracy = np.mean([qa_result[model_name]['understanding_accuracy'] for model_name in model_names])
+            print('Average detail accuracy:', avg_detail_accuracy)
+            print('Average understanding accuracy:', avg_understanding_accuracy)
+            overall_qa_result['avg_detail_accuracy'] = avg_detail_accuracy
+            overall_qa_result['avg_understanding_accuracy'] = avg_understanding_accuracy
+            overall_qa_result['qa_result'] = qa_result
+            with open(f'{save_path}/overall_qa_result.json', 'w') as f:
+                json.dump(overall_qa_result, f, indent=4)
+    elif args.metric == 'word_count':
+        if args.poster_method == 'paper':
+            # loop through all images in the folder
+            image_paths = open_folder_images(gen_folder, args.paper_name.replace(' ', '_'), return_path=True)
+            word_count = 0
+            for image_path in image_paths:
+                # count words in each image
+                word_count += count_words_in_image(image_path)
+        else:
+            word_count = count_words_in_image(gen_poster_path)
+        # save to json
+        with open(f'{save_path}/word_count.json', 'w') as f:
+            json.dump({'word_count': word_count}, f, indent=4)
+    elif args.metric == 'token_count':
+        if args.poster_method == 'paper':
+            # loop through all images in the folder
+            image_paths = open_folder_images(gen_folder, args.paper_name.replace(' ', '_'), return_path=True)
+            token_count = 0
+            for image_path in image_paths:
+                # count tokens in each image
+                token_count += count_tokens_in_image(image_path)
+        else:
+            token_count = count_tokens_in_image(gen_poster_path)
+        # save to json
+        with open(f'{save_path}/token_count.json', 'w') as f:
+            json.dump({'token_count': token_count}, f, indent=4)
+    elif args.metric == 'judge':
+        agent_config = get_agent_config('4o')
+        if args.poster_method == 'paper':
+            poster_images = open_folder_images(gen_folder, args.paper_name.replace(' ', '_'))
+        else:
+            poster_images = [Image.open(gen_poster_path)]
+        results = eval_vlm_as_judge(
+            poster_image_list=poster_images,
+            agent_config=agent_config,
+        )
+        aesthetic_aspects = [
+            'aesthetic_element',
+            'aesthetic_engagement',
+            'aesthetic_layout'
+        ]
+        information_aspects = [
+            'information_low_level',
+            'information_logic',
+            'information_content',
+        ]
+        # compute average scores for all, for aesthetic, and for information
+        overall_average = np.mean([results[aspect]['score'] for aspect in results])
+        aesthetic_average = np.mean([results[aspect]['score'] for aspect in results if aspect in aesthetic_aspects])
+        information_average = np.mean([results[aspect]['score'] for aspect in results if aspect in information_aspects])
+        judge_result = {
+            'overall_average': overall_average,
+            'aesthetic_average': aesthetic_average,
+            'information_average': information_average,
+            'results': results
+        }
+        # save to json
+        with open(f'{save_path}/judge_result.json', 'w') as f:
+            json.dump(judge_result, f, indent=4)
+    elif args.metric == 'aesthetic_judge':
+        agent_config = get_agent_config('4o')
+        if args.poster_method == 'paper':
+            poster_images = open_folder_images(gen_folder, args.paper_name.replace(' ', '_'))
+        else:
+            poster_images = [Image.open(gen_poster_path)]
+        results = eval_vlm_as_judge(
+            poster_image_list=poster_images,
+            agent_config=agent_config,
+            aspect='aesthetic'
+        )
+        aesthetic_aspects = [
+            'aesthetic_element',
+            'aesthetic_engagement',
+            'aesthetic_layout'
+        ]
+        aesthetic_average = np.mean([results[aspect]['score'] for aspect in results if aspect in aesthetic_aspects])
+        judge_result = {
+            'aesthetic_average': aesthetic_average,
+            'results': results
+        }
+        # save to json
+        with open(f'{save_path}/aesthetic_judge_result.json', 'w') as f:
+            json.dump(judge_result, f, indent=4)
+    if args.poster_method == 'paper':
+        # remove the temp folder
+        shutil.rmtree(temp_dir)
+        print(f"Removed temporary folder {temp_dir}")

Paper2Poster/Paper2Poster-eval/eval_qa_fix.py ADDED Viewed

	@@ -0,0 +1,114 @@

+#!/usr/bin/env python3
+"""
+Run eval_poster_pipeline.py for every sub-folder in poster_sum_100,
+using up to 10 threads.  poster_method and fix are now taken from
+command-line arguments.
+Example:
+    python run_eval_threads.py \
+        --poster_method poster_sum_50 \
+        --fix llama-3-70b-vl
+"""
+import argparse
+import concurrent.futures as cf
+import pathlib
+import signal
+import subprocess
+import sys
+BASE_DIR     = pathlib.Path("poster_sum_100")   # directory holding the papers                           # number of worker threads
+# ── Argument parsing ───────────────────────────────────────────────────────────
+parser = argparse.ArgumentParser(
+    description="Run eval_poster_pipeline.py concurrently on all papers."
+)
+parser.add_argument(
+    "--poster_method",
+    default="poster_sum_100",
+    help="Name of the poster-generation method to evaluate (default: %(default)s)",
+)
+parser.add_argument(
+    "--fix",
+    default="qwen-2.5-vl-72b",
+    help="Value to pass to --fix in eval_poster_pipeline.py (default: %(default)s)",
+)
+parser.add_argument(
+    '--max_workers',
+    type=int,
+    default=1,
+)
+parser.add_argument('--del_model_name', type=str)
+args = parser.parse_args()
+# ───────────────────────────────────────────────────────────────────────────────
+MAX_WORKERS = args.max_workers
+def run_pipeline(subfolder: str, poster_method: str, fix: str) -> None:
+    """Invoke eval_poster_pipeline.py for a single paper."""
+    cmd = [
+        "python",
+        "eval_poster_pipeline.py",
+        "--paper_name",
+        subfolder,
+        "--poster_method",
+        poster_method,
+        "--poster_image_name",
+        "poster.png",
+        "--metric",
+        "qa",
+        "--fix",
+        fix,
+    ]
+    if args.del_model_name:
+        cmd += ["--del_model_name", args.del_model_name]
+    subprocess.run(cmd, check=True)
+MAX_RETRIES = 50
+def run_with_retries(folder: str, poster_method, fix) -> None:
+    """
+    Tries to run_pipeline up to MAX_RETRIES times before giving up.
+    """
+    for attempt in range(1, MAX_RETRIES + 1):
+        try:
+            run_pipeline(folder, poster_method, fix)
+            return
+        except Exception as e:
+            if attempt < MAX_RETRIES:
+                print(f"⚠️  {folder}: attempt {attempt} failed ({e!r}), retrying…")
+            else:
+                # Last attempt also failed, re-raise so the pool will catch it
+                raise
+def main() -> None:
+    folders = sorted(p.name for p in BASE_DIR.iterdir() if p.is_dir())
+    with cf.ThreadPoolExecutor(max_workers=MAX_WORKERS) as pool:
+        futures = {
+            pool.submit(run_with_retries, f, args.poster_method, args.fix): f
+            for f in folders
+        }
+        for fut in cf.as_completed(futures):
+            paper = futures[fut]
+            try:
+                fut.result()
+                print(f"✓ {paper} done")
+            except Exception as e:
+                print(f"✗ {paper} failed after {MAX_RETRIES} attempts: {e}", file=sys.stderr)
+# ── Graceful shutdown on Ctrl-C / SIGTERM ──────────────────────────────────────
+def _handle_signal(signum, frame):
+    print("\nReceived signal, shutting down…", file=sys.stderr)
+    sys.exit(1)
+signal.signal(signal.SIGINT, _handle_signal)
+signal.signal(signal.SIGTERM, _handle_signal)
+# ── Entry point ────────────────────────────────────────────────────────────────
+if __name__ == "__main__":
+    main()

Paper2Poster/PosterAgent/LLM_direct_generate.py ADDED Viewed

	@@ -0,0 +1,103 @@

+from dotenv import load_dotenv
+from utils.src.utils import get_json_from_response
+from camel.models import ModelFactory
+from camel.agents import ChatAgent
+from utils.wei_utils import account_token, get_agent_config, html_to_png
+from utils.pptx_utils import *
+from utils.critic_utils import *
+import yaml
+import time
+from jinja2 import Environment, StrictUndefined
+from utils.poster_eval_utils import get_poster_text
+import argparse
+import json
+import os
+load_dotenv()
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--paper_path', type=str)
+    parser.add_argument('--model_name', type=str, default='4o')
+    args = parser.parse_args()
+    # get current directory
+    current_dir = os.path.dirname(os.path.abspath(__file__))
+    meta_dir = args.paper_path.replace('paper.pdf', 'meta.json')
+    meta = json.load(open(meta_dir, 'r'))
+    poster_width = meta['width']
+    poster_height = meta['height']
+    output_dir = f"{args.model_name}_HTML/{args.paper_path.replace('paper.pdf', '')}"
+    os.makedirs(output_dir, exist_ok=True)
+    total_input_token = 0
+    total_output_token = 0
+    start_time = time.time()
+    model_config = get_agent_config(args.model_name)
+    model = ModelFactory.create(
+        model_platform=model_config['model_platform'],
+        model_type=model_config['model_type'],
+        model_config_dict=model_config['model_config'],
+    )
+    paper_text = get_poster_text(args.paper_path)
+    actor_agent_name = 'LLM_gen_HTML'
+    with open(f'prompt_templates/{actor_agent_name}.yaml', "r") as f:
+        content_config = yaml.safe_load(f)
+    jinja_env = Environment(undefined=StrictUndefined)
+    template = jinja_env.from_string(content_config["template"])
+    actor_sys_msg = content_config['system_prompt']
+    actor_agent = ChatAgent(
+        system_message=actor_sys_msg,
+        model=model,
+        message_window_size=None
+    )
+    jinja_args = {
+        'document_markdown': paper_text,
+        'poster_width': poster_width,
+        'poster_height': poster_height,
+    }
+    prompt = template.render(**jinja_args)
+    actor_agent.reset()
+    response = actor_agent.step(prompt)
+    input_token, output_token = account_token(response)
+    total_input_token += input_token
+    total_output_token += output_token
+    result_json = get_json_from_response(response.msgs[0].content)
+    html_str = result_json['HTML']
+    # write to poster.html
+    with open(f'{output_dir}/poster.html', 'w') as f:
+        f.write(html_str)
+    html_to_png(
+        os.path.join(current_dir, output_dir, 'poster.html'),
+        poster_width,
+        poster_height,
+        os.path.join(current_dir, output_dir, 'poster.png')
+    )
+    end_time = time.time()
+    elapsed_time = end_time - start_time
+    log = {
+        'input_token': total_input_token,
+        'output_token': total_output_token,
+        'time_taken': elapsed_time
+    }
+    with open(f'{output_dir}/log.json', 'w') as f:
+        json.dump(log, f, indent=4)

Paper2Poster/PosterAgent/LLM_direct_generate_beamer.py ADDED Viewed

	@@ -0,0 +1,189 @@

+import os
+import json
+import time
+from dotenv import load_dotenv
+from jinja2 import Environment, StrictUndefined
+from utils.src.utils import get_json_from_response, account_token, html_to_png
+from utils.config_utils import load_poster_yaml_config
+from camel.models import ModelFactory
+from camel.agents import ChatAgent
+from camel.configs import ChatGPTConfig
+from camel.types import ModelPlatformType, ModelType
+load_dotenv()
+def gen_beamer_poster_direct(
+    paper_text: str,
+    poster_width_cm: float = 120,
+    poster_height_cm: float = 90,
+    beamer_theme: str = "default",
+    output_dir: str = "output",
+    model_name: str = "4o"
+):
+    """
+    Generate Beamer poster directly from paper text using LLM.
+    Args:
+        paper_text: Extracted text from the paper
+        poster_width_cm: Poster width in centimeters
+        poster_height_cm: Poster height in centimeters
+        beamer_theme: Beamer theme name
+        output_dir: Output directory
+        model_name: Model name for generation
+    """
+    start_time = time.time()
+    total_input_token, total_output_token = 0, 0
+    # Load configuration
+    config_path = "utils/prompt_templates/LLM_gen_Beamer.yaml"
+    with open(config_path, "r") as f:
+        config = yaml.safe_load(f)
+    # Create model and agent
+    actor_model = ModelFactory.create(
+        model_platform=ModelPlatformType.OPENAI,
+        model_type=ModelType.GPT_4O,
+        model_config_dict=ChatGPTConfig().as_dict(),
+    )
+    actor_agent = ChatAgent(
+        system_message=config['system_prompt'],
+        model=actor_model,
+        message_window_size=None
+    )
+    # Prepare template arguments
+    jinja_args = {
+        'document_markdown': paper_text,
+        'poster_width_cm': poster_width_cm,
+        'poster_height_cm': poster_height_cm,
+        'beamer_theme': beamer_theme,
+        'aspect_ratio': "169",
+        'title_color': "[47, 85, 151]",
+        'text_color': "[0, 0, 0]"
+    }
+    # Render template
+    jinja_env = Environment(undefined=StrictUndefined)
+    template = jinja_env.from_string(config["template"])
+    prompt = template.render(**jinja_args)
+    # Generate Beamer code
+    actor_agent.reset()
+    response = actor_agent.step(prompt)
+    input_token, output_token = account_token(response)
+    total_input_token += input_token
+    total_output_token += output_token
+    # Extract LaTeX code
+    result_json = get_json_from_response(response.msgs[0].content)
+    latex_str = result_json['LATEX']
+    # Save LaTeX file
+    os.makedirs(output_dir, exist_ok=True)
+    tex_path = os.path.join(output_dir, 'poster.tex')
+    with open(tex_path, 'w', encoding='utf-8') as f:
+        f.write(latex_str)
+    # Compile to PDF
+    print("Compiling LaTeX to PDF...")
+    success = compile_beamer_to_pdf(tex_path, output_dir)
+    if success:
+        print(f"✅ Beamer poster generated successfully: {tex_path}")
+    else:
+        print("❌ Failed to compile LaTeX to PDF")
+    # Save log
+    end_time = time.time()
+    elapsed_time = end_time - start_time
+    log = {
+        'input_token': total_input_token,
+        'output_token': total_output_token,
+        'time_taken': elapsed_time,
+        'output_format': 'beamer',
+        'beamer_theme': beamer_theme
+    }
+    with open(os.path.join(output_dir, 'log.json'), 'w') as f:
+        json.dump(log, f, indent=4)
+    return tex_path, success
+def compile_beamer_to_pdf(tex_path: str, output_dir: str = "."):
+    """
+    Compile Beamer .tex file to PDF using pdflatex.
+    Args:
+        tex_path: Path to .tex file
+        output_dir: Output directory for PDF
+    """
+    import subprocess
+    try:
+        # Run pdflatex twice for proper cross-references
+        result1 = subprocess.run(
+            ['pdflatex', '-output-directory', output_dir, tex_path],
+            capture_output=True,
+            text=True,
+            timeout=60
+        )
+        result2 = subprocess.run(
+            ['pdflatex', '-output-directory', output_dir, tex_path],
+            capture_output=True,
+            text=True,
+            timeout=60
+        )
+        if result1.returncode == 0 and result2.returncode == 0:
+            print(f"Successfully compiled {tex_path} to PDF")
+            return True
+        else:
+            print(f"Error compiling {tex_path}:")
+            print(result1.stderr)
+            print(result2.stderr)
+            return False
+    except subprocess.TimeoutExpired:
+        print(f"Timeout while compiling {tex_path}")
+        return False
+    except Exception as e:
+        print(f"Error compiling {tex_path}: {e}")
+        return False
+if __name__ == "__main__":
+    import argparse
+    parser = argparse.ArgumentParser(description='Generate Beamer poster directly from paper')
+    parser.add_argument('--paper_path', required=True, help='Path to paper PDF')
+    parser.add_argument('--output_dir', default='beamer_output', help='Output directory')
+    parser.add_argument('--poster_width_cm', type=float, default=120, help='Poster width in cm')
+    parser.add_argument('--poster_height_cm', type=float, default=90, help='Poster height in cm')
+    parser.add_argument('--beamer_theme', default='default', help='Beamer theme')
+    parser.add_argument('--model_name', default='4o', help='Model name')
+    args = parser.parse_args()
+    # Extract text from paper (you'll need to implement this)
+    # For now, using placeholder text
+    paper_text = "This is placeholder text. In practice, you would extract text from the PDF."
+    # Generate Beamer poster
+    tex_path, success = gen_beamer_poster_direct(
+        paper_text=paper_text,
+        poster_width_cm=args.poster_width_cm,
+        poster_height_cm=args.poster_height_cm,
+        beamer_theme=args.beamer_theme,
+        output_dir=args.output_dir,
+        model_name=args.model_name
+    )
+    if success:
+        print(f"Beamer poster generated at: {tex_path}")
+    else:
+        print("Failed to generate Beamer poster")

Paper2Poster/PosterAgent/__init__.py ADDED Viewed

	@@ -0,0 +1,16 @@

+from . import (
+    apply_theme,
+    create_dataset,
+    deoverflow,
+    deoverflow_parallel,
+    fill_and_style,
+    gen_outline_layout_parallel,
+    gen_outline_layout,
+    gen_poster_content,
+    gen_pptx_code,
+    LLM_direct_generate,
+    new_pipeline,
+    parse_raw,
+    poster_gen_pipeline,
+    tree_split_layout
+)

Paper2Poster/PosterAgent/apply_theme.py ADDED Viewed

	@@ -0,0 +1,281 @@

+from dotenv import load_dotenv
+from utils.src.utils import ppt_to_images, get_json_from_response
+import json
+import shutil
+from camel.models import ModelFactory
+from camel.agents import ChatAgent
+from utils.wei_utils import *
+from camel.messages import BaseMessage
+from PIL import Image
+import pickle as pkl
+from utils.pptx_utils import *
+from utils.critic_utils import *
+import yaml
+from jinja2 import Environment, StrictUndefined
+from pdf2image import convert_from_path
+import argparse
+load_dotenv()
+def poster_apply_theme(args, actor_config, critic_config):
+    total_input_token, total_output_token = 0, 0
+    extract_input_token, extract_output_token = 0, 0
+    gen_input_token, gen_output_token = 0, 0
+    non_overlap_ckpt = pkl.load(open(f'checkpoints/{args.model_name}_{args.poster_name}_non_overlap_ckpt_{args.index}.pkl', 'rb'))
+    non_overlap_code = non_overlap_ckpt['final_code_by_section']
+    sections = list(non_overlap_code.keys())
+    sections = [s for s in sections if s != 'meta']
+    template_img = convert_from_path(args.template_path)[0]
+    image_bytes = io.BytesIO()
+    template_img.save(image_bytes, format="PNG")
+    image_bytes.seek(0)
+    # Reload the image from memory as a standard PIL.Image.Image
+    template_img = Image.open(image_bytes)
+    title_actor_agent_name = 'theme_agent_title'
+    with open(f"prompt_templates/{title_actor_agent_name}.yaml", "r") as f:
+        title_theme_actor_config = yaml.safe_load(f)
+    section_actor_agent_name = 'theme_agent_section'
+    with open(f"prompt_templates/{section_actor_agent_name}.yaml", "r") as f:
+        section_theme_actor_config = yaml.safe_load(f)
+    title_actor_model = ModelFactory.create(
+        model_platform=actor_config['model_platform'],
+        model_type=actor_config['model_type'],
+        model_config_dict=actor_config['model_config'], # [Optional] the config for model
+    )
+    title_actor_sys_msg = title_theme_actor_config['system_prompt']
+    title_actor_agent = ChatAgent(
+        system_message=title_actor_sys_msg,
+        model=title_actor_model,
+        message_window_size=10, # [Optional] the length for chat memory
+    )
+    section_actor_model = ModelFactory.create(
+        model_platform=actor_config['model_platform'],
+        model_type=actor_config['model_type'],
+        model_config_dict=actor_config['model_config'], # [Optional] the config for model
+    )
+    section_actor_sys_msg = section_theme_actor_config['system_prompt']
+    section_actor_agent = ChatAgent(
+        system_message=section_actor_sys_msg,
+        model=section_actor_model,
+        message_window_size=10, # [Optional] the length for chat memory
+    )
+    critic_model = ModelFactory.create(
+        model_platform=critic_config['model_platform'],
+        model_type=critic_config['model_type'],
+        model_config_dict=critic_config['model_config'],
+    )
+    critic_sys_msg = 'You are a helpful assistant.'
+    critic_agent = ChatAgent(
+        system_message=critic_sys_msg,
+        model=critic_model,
+        message_window_size=None,
+    )
+    theme_aspects = {
+        'background': ['background'],
+        'title': ['title_author', 'title_author_border'],
+        'section': ['section_body', 'section_title', 'section_border']
+    }
+    theme_styles = {}
+    for aspect in theme_aspects.keys():
+        theme_styles[aspect] = {}
+    for aspect, prompt_types in theme_aspects.items():
+        for prompt_type in prompt_types:
+            print(f'Getting style for {prompt_type}')
+            with open(f"prompt_templates/theme_templates/theme_{prompt_type}.txt", "r") as f:
+                prompt = f.read()
+            msg = BaseMessage.make_user_message(
+                role_name="User",
+                content=prompt,
+                image_list=[template_img],
+            )
+            critic_agent.reset()
+            response = critic_agent.step(msg)
+            input_token, output_token = account_token(response)
+            total_input_token += input_token
+            total_output_token += output_token
+            extract_input_token += input_token
+            extract_output_token += output_token
+            theme_style = get_json_from_response(response.msgs[0].content)
+            theme_styles[aspect][prompt_type] = theme_style
+    if 'fontStyle' in theme_styles['section']['section_body']:
+        del theme_styles['section']['section_body']['fontStyle']
+    outline_path = f'outlines/{args.model_name}_{args.poster_name}_outline_{args.index}.json'
+    outline = json.load(open(outline_path, 'r'))
+    outline_skeleton = {}
+    for key, val in outline.items():
+        if key == 'meta':
+            continue
+        if not 'subsections' in val:
+            outline_skeleton[key] = {
+                'section': key
+            }
+        else:
+            for subsection_name, subsection_dict in val['subsections'].items():
+                outline_skeleton[subsection_dict['name']] = {
+                    'section': key
+                }
+    for key in outline_skeleton.keys():
+        if 'title' in key.lower() or 'author' in key.lower():
+            outline_skeleton[key]['style'] = theme_styles['section']['section_title']
+        else:
+            outline_skeleton[key]['style'] = theme_styles['section']['section_body']
+    outline_skeleton_list = []
+    for section in sections[1:]:
+        # append all subsections whose section key is the current section
+        for key, val in outline_skeleton.items():
+            if val['section'] == section:
+                outline_skeleton_list.append({key: val})
+    theme_logs = {}
+    theme_code = {}
+    concatenated_code = {}
+    # Title
+    jinja_env = Environment(undefined=StrictUndefined)
+    title_actor_template = jinja_env.from_string(title_theme_actor_config["template"])
+    # Title section
+    print(f'Processing section {sections[0]}')
+    curr_title_code = non_overlap_code[sections[0]]
+    for style in ['background', 'title']:
+        for sub_style in theme_styles[style].keys():
+            print(f'    Applying theme for {sub_style}')
+            jinja_args = {
+                'style_json': {sub_style: theme_styles[style][sub_style]},
+                'function_docs': documentation,
+                'existing_code': curr_title_code
+            }
+            actor_prompt = title_actor_template.render(**jinja_args)
+            log = apply_theme(title_actor_agent, actor_prompt, args.max_retry, existing_code='')
+            if log[-1]['error'] is not None:
+                raise Exception(log[-1]['error'])
+            input_token, output_token = log[-1]['cumulative_tokens']
+            total_input_token += input_token
+            total_output_token += output_token
+            gen_input_token += input_token
+            gen_output_token += output_token
+            shutil.copy('poster.pptx', f'tmp/theme_poster_<{sections[0]}>_<{style}>_<{sub_style}>.pptx')
+            if not style in theme_logs:
+                theme_logs[style] = {}
+            theme_logs[style][sub_style] = log
+            curr_title_code = log[-1]['code']
+    theme_code[sections[0]] = curr_title_code
+    concatenated_code[sections[0]] = log[-1]['concatenated_code']
+    # Remaining sections
+    jinja_env = Environment(undefined=StrictUndefined)
+    section_actor_template = jinja_env.from_string(section_theme_actor_config["template"])
+    prev_section = None
+    for style_dict in outline_skeleton_list:
+        curr_subsection = list(style_dict.keys())[0]
+        curr_section = style_dict[curr_subsection]['section']
+        section_index = sections.index(curr_section)
+        print(f'Processing section {curr_section}')
+        if prev_section != curr_section:
+            prev_section = curr_section
+            curr_section_code = non_overlap_code[curr_section]
+        print(f'    Applying theme for {curr_subsection}')
+        jinja_args = {
+            'style_json': json.dumps({curr_subsection: style_dict[curr_subsection]['style']}, indent=4),
+            'function_docs': documentation,
+            'existing_code': curr_section_code
+        }
+        actor_prompt = section_actor_template.render(**jinja_args)
+        existing_code = concatenated_code[sections[section_index - 1]]
+        log = apply_theme(section_actor_agent, actor_prompt, args.max_retry, existing_code=existing_code)
+        if log[-1]['error'] is not None:
+            raise Exception(log[-1]['error'])
+        input_token, output_token = log[-1]['cumulative_tokens']
+        total_input_token += input_token
+        total_output_token += output_token
+        gen_input_token += input_token
+        gen_output_token += output_token
+        shutil.copy('poster.pptx', f'tmp/theme_poster_<{curr_section}>_<{curr_subsection}>.pptx')
+        if not style in theme_logs:
+            theme_logs[style] = {}
+        theme_logs[style][sub_style] = log
+        curr_section_code = log[-1]['code']
+        theme_code[curr_section] = curr_section_code
+        concatenated_code[curr_section] = log[-1]['concatenated_code']
+    ppt_to_images(f'poster.pptx', 'tmp/theme_preview')
+    result_dir = f'results/{args.poster_name}/{args.model_name}/{args.index}'
+    shutil.copy('poster.pptx', f'{result_dir}/theme_poster.pptx')
+    ppt_to_images(f'poster.pptx', f'{result_dir}/theme_poster_preview')
+    ckpt = {
+        'theme_styles': theme_styles,
+        'theme_logs': theme_logs,
+        'theme_code': theme_code,
+        'concatenated_code': concatenated_code,
+        'total_input_token': total_input_token,
+        'total_output_token': total_output_token,
+        'extract_input_token': extract_input_token,
+        'extract_output_token': extract_output_token,
+        'gen_input_token': gen_input_token,
+        'gen_output_token': gen_output_token
+    }
+    pkl.dump(ckpt, open(f'checkpoints/{args.model_name}_{args.poster_name}_theme_ckpt.pkl', 'wb'))
+    return total_input_token, total_output_token
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--poster_name', type=str, default=None)
+    parser.add_argument('--model_name', type=str, default='4o')
+    parser.add_argument('--poster_path', type=str, required=True)
+    parser.add_argument('--index', type=int, default=0)
+    parser.add_argument('--template_path', type=str)
+    parser.add_argument('--max_retry', type=int, default=3)
+    args = parser.parse_args()
+    actor_config = get_agent_config(args.model_name)
+    critic_config = get_agent_config(args.model_name)
+    if args.poster_name is None:
+        args.poster_name = args.poster_path.split('/')[-1].replace('.pdf', '').replace(' ', '_')
+    input_token, output_token = poster_apply_theme(args, actor_config, critic_config)
+    print(f'Token consumption: {input_token} -> {output_token}')

Paper2Poster/PosterAgent/beamer_pipeline.py ADDED Viewed

	@@ -0,0 +1,182 @@

+import os
+import json
+import argparse
+from typing import Dict, Any, List
+# Import existing modules
+from PosterAgent.gen_beamer_code import (
+    generate_beamer_poster_code,
+    convert_pptx_layout_to_beamer,
+    save_beamer_code,
+    compile_beamer_to_pdf
+)
+from PosterAgent.gen_pptx_code import generate_poster_code
+from utils.wei_utils import run_code
+from utils.theme_utils import get_default_theme, create_theme_with_alignment
+def generate_beamer_poster(
+    panel_arrangement_inches: List[Dict[str, Any]],
+    text_arrangement_inches: List[Dict[str, Any]],
+    figure_arrangement_inches: List[Dict[str, Any]],
+    bullet_content: List[Dict[str, Any]],
+    poster_info: Dict[str, str],
+    args,
+    width_cm: float = 120,
+    height_cm: float = 90,
+    theme: str = "default"
+):
+    """
+    Generate Beamer poster instead of PowerPoint.
+    Args:
+        panel_arrangement_inches: Panel layout data
+        text_arrangement_inches: Text layout data
+        figure_arrangement_inches: Figure layout data
+        bullet_content: Content for text boxes
+        poster_info: Poster metadata (title, author, institute)
+        args: Command line arguments
+        width_cm: Poster width in centimeters
+        height_cm: Poster height in centimeters
+        theme: Beamer theme name
+    """
+    print("\n🎯 Generating Beamer poster code...", flush=True)
+    # Convert layout data to Beamer format
+    beamer_data = convert_pptx_layout_to_beamer({
+        'text_arrangement': text_arrangement_inches,
+        'figure_arrangement': figure_arrangement_inches
+    })
+    # Update poster info
+    beamer_data['poster_info'].update(poster_info)
+    # Generate Beamer code
+    beamer_code = generate_beamer_poster_code(
+        sections=beamer_data['sections'],
+        figures=beamer_data['figures'],
+        poster_info=beamer_data['poster_info'],
+        width_cm=width_cm,
+        height_cm=height_cm,
+        theme=theme
+    )
+    # Save Beamer code
+    tex_path = f'{args.tmp_dir}/poster.tex'
+    save_beamer_code(beamer_code, tex_path)
+    # Compile to PDF
+    print("\n📄 Compiling Beamer to PDF...", flush=True)
+    success = compile_beamer_to_pdf(tex_path, args.tmp_dir)
+    if not success:
+        raise RuntimeError('Error in compiling Beamer to PDF')
+    print(f"✅ Beamer poster generated successfully: {tex_path}")
+    return tex_path
+def modify_new_pipeline_for_beamer(args):
+    """
+    Modified version of new_pipeline.py to support Beamer output.
+    This function replaces the PowerPoint generation part with Beamer generation.
+    """
+    # Import the original pipeline components
+    from PosterAgent.new_pipeline import (
+        parse_paper_content,
+        gen_outline_layout_parallel,
+        gen_poster_content,
+        deoverflow_parallel,
+        apply_theme
+    )
+    # ... (keep all the existing pipeline steps until poster generation)
+    # At the poster generation step, replace PowerPoint with Beamer:
+    # === Beamer Poster Generation ===
+    print("\n🎯 Generating Beamer poster...", flush=True)
+    # Extract poster information from content
+    poster_info = {
+        'title': 'Research Poster Title',  # Extract from paper content
+        'author': 'Author Name',           # Extract from paper content
+        'institute': 'Institute Name'      # Extract from paper content
+    }
+    # Convert inches to centimeters (1 inch = 2.54 cm)
+    width_cm = args.poster_width_inches * 2.54
+    height_cm = args.poster_height_inches * 2.54
+    # Generate Beamer poster
+    tex_path = generate_beamer_poster(
+        panel_arrangement_inches=panel_arrangement_inches,
+        text_arrangement_inches=text_arrangement_inches,
+        figure_arrangement_inches=figure_arrangement_inches,
+        bullet_content=bullet_content,
+        poster_info=poster_info,
+        args=args,
+        width_cm=width_cm,
+        height_cm=height_cm,
+        theme=getattr(args, 'beamer_theme', 'default')
+    )
+    # Copy output to final directory
+    output_dir = f'<{args.model_name_t}_{args.model_name_v}>_generated_posters/{args.poster_path.replace("paper.pdf", "")}'
+    os.makedirs(output_dir, exist_ok=True)
+    # Copy generated files
+    import shutil
+    shutil.copy(tex_path, f'{output_dir}/poster.tex')
+    shutil.copy(f'{args.tmp_dir}/poster.pdf', f'{output_dir}/poster.pdf')
+    print(f"✅ Beamer poster saved to: {output_dir}")
+    return output_dir
+def add_beamer_arguments(parser):
+    """Add Beamer-specific command line arguments."""
+    parser.add_argument(
+        '--output_format',
+        choices=['pptx', 'beamer'],
+        default='pptx',
+        help='Output format: pptx (PowerPoint) or beamer (LaTeX)'
+    )
+    parser.add_argument(
+        '--beamer_theme',
+        default='default',
+        help='Beamer theme name (default, Madrid, Warsaw, etc.)'
+    )
+    parser.add_argument(
+        '--beamer_width_cm',
+        type=float,
+        default=120,
+        help='Beamer poster width in centimeters'
+    )
+    parser.add_argument(
+        '--beamer_height_cm',
+        type=float,
+        default=90,
+        help='Beamer poster height in centimeters'
+    )
+    return parser
+# Example integration with existing pipeline
+def integrate_beamer_with_existing_pipeline():
+    """
+    Example of how to integrate Beamer generation with the existing pipeline.
+    """
+    # This would be added to the main pipeline function
+    pass
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description='Generate Beamer poster from paper')
+    parser = add_beamer_arguments(parser)
+    # Add other existing arguments...
+    args = parser.parse_args()
+    if args.output_format == 'beamer':
+        modify_new_pipeline_for_beamer(args)
+    else:
+        # Use original PowerPoint pipeline
+        pass

Paper2Poster/PosterAgent/create_dataset.py ADDED Viewed

	@@ -0,0 +1,69 @@

+from datasets import load_dataset
+import os
+import subprocess
+from PIL import Image
+import json
+def generate_meta_json(base_dir='Paper2Poster-data'):
+    # Loop over each item in the specified base directory
+    for folder_name in os.listdir(base_dir):
+        subfolder_path = os.path.join(base_dir, folder_name)
+        # Ensure the item is a directory
+        if os.path.isdir(subfolder_path):
+            poster_path = os.path.join(subfolder_path, 'poster.png')
+            # Check if the poster.png exists in the subfolder
+            if os.path.exists(poster_path):
+                try:
+                    # Open the image and get size (width, height)
+                    with Image.open(poster_path) as img:
+                        width, height = img.size
+                    # Prepare metadata dictionary
+                    metadata = {
+                        'width': width,
+                        'height': height
+                    }
+                    # Write metadata to meta.json in the same subfolder
+                    meta_json_path = os.path.join(subfolder_path, 'meta.json')
+                    with open(meta_json_path, 'w') as json_file:
+                        json.dump(metadata, json_file)
+                    print(f"Metadata for '{folder_name}' saved successfully.")
+                except Exception as e:
+                    print(f"Error processing image in folder '{folder_name}': {e}")
+            else:
+                print(f"No poster.png found in folder '{folder_name}'.")
+if __name__ == "__main__":
+    dataset = load_dataset("Paper2Poster/Paper2Poster", split="train")
+    os.makedirs('Paper2Poster-data', exist_ok=True)
+    for data in dataset:
+        paper_title = data['title']
+        paper_url = data['paper_url']
+        poster_url = data['image_url']
+        qa = data['qa']
+        os.makedirs(f'Paper2Poster-data/{paper_title}', exist_ok=True)
+        paper_output_path = os.path.join('Paper2Poster-data', paper_title, 'paper.pdf')
+        poster_output_path = os.path.join('Paper2Poster-data', paper_title, 'poster.png')
+        qa_path = os.path.join('Paper2Poster-data', paper_title, 'o3_qa.json')
+        qa_dict = json.loads(qa)
+        with open(qa_path, 'w') as f:
+            json.dump(qa_dict, f, indent=4)
+        print(f"Saved QA for {paper_title} into {qa_path}")
+        try:
+            subprocess.run(['wget', paper_url, '-O', paper_output_path], check=True)
+            subprocess.run(['wget', poster_url, '-O', poster_output_path], check=True)
+            print(f"Downloaded {poster_url} into {poster_output_path}")
+            print(f"Downloaded {paper_url} into {paper_output_path}")
+        except subprocess.CalledProcessError as e:
+            print(f"Error downloading {paper_url} or {poster_url}: {e}")
+    generate_meta_json('Paper2Poster-data')

Paper2Poster/PosterAgent/deoverflow.py ADDED Viewed

	@@ -0,0 +1,234 @@

+from dotenv import load_dotenv
+from utils.src.utils import ppt_to_images, get_json_from_response
+import json
+from camel.models import ModelFactory
+from camel.agents import ChatAgent
+from utils.wei_utils import *
+from camel.messages import BaseMessage
+from PIL import Image
+import pickle as pkl
+from utils.pptx_utils import *
+from utils.critic_utils import *
+import yaml
+import argparse
+import shutil
+from jinja2 import Environment, StrictUndefined
+load_dotenv()
+MAX_ATTEMPTS = 5
+def deoverflow(args, actor_config, critic_config):
+    total_input_token, total_output_token = 0, 0
+    style_ckpt = pkl.load(open(f'checkpoints/{args.model_name}_{args.poster_name}_style_ckpt_{args.index}.pkl', 'rb'))
+    logs_ckpt = pkl.load(open(f'checkpoints/{args.model_name}_{args.poster_name}_ckpt_{args.index}.pkl', 'rb'))
+    style_logs = style_ckpt['style_logs']
+    sections = list(style_logs.keys())
+    sections = [s for s in sections if s != 'meta']
+    slide_width = style_ckpt['outline']['meta']['width']
+    slide_height = style_ckpt['outline']['meta']['height']
+    content = json.load(open(f'contents/{args.model_name}_{args.poster_name}_poster_content_{args.index}.json', 'r'))
+    outline = logs_ckpt['outline']
+    name_to_hierarchy = get_hierarchy(outline, 1)
+    critic_agent_name = 'critic_overlap_agent'
+    with open(f"prompt_templates/{critic_agent_name}.yaml", "r") as f:
+        deoverflow_critic_config = yaml.safe_load(f)
+    actor_agent_name = 'actor_editor_agent'
+    with open(f"prompt_templates/{actor_agent_name}.yaml", "r") as f:
+        deoverflow_actor_config = yaml.safe_load(f)
+    actor_model = ModelFactory.create(
+        model_platform=actor_config['model_platform'],
+        model_type=actor_config['model_type'],
+        model_config_dict=actor_config['model_config'],
+    )
+    actor_sys_msg = deoverflow_actor_config['system_prompt']
+    actor_agent = ChatAgent(
+        system_message=actor_sys_msg,
+        model=actor_model,
+        message_window_size=10,
+    )
+    critic_model = ModelFactory.create(
+        model_platform=critic_config['model_platform'],
+        model_type=critic_config['model_type'],
+        model_config_dict=critic_config['model_config'],
+    )
+    critic_sys_msg = deoverflow_critic_config['system_prompt']
+    critic_agent = ChatAgent(
+        system_message=critic_sys_msg,
+        model=critic_model,
+        message_window_size=None,
+    )
+    jinja_env = Environment(undefined=StrictUndefined)
+    actor_template = jinja_env.from_string(deoverflow_actor_config["template"])
+    critic_template = jinja_env.from_string(deoverflow_critic_config["template"])
+    critic_logs = {}
+    actor_logs = {}
+    img_logs = {}
+    # Load neg and pos examples
+    neg_img = Image.open('overflow_example/neg.jpg')
+    pos_img = Image.open('overflow_example/pos.jpg')
+    for section_index in range(len(sections)):
+        section_name = sections[section_index]
+        section_code = style_logs[section_name][-1]['code']
+        if 'subsections' in content[section_name]:
+            subsections = list(content[section_name]['subsections'].keys())
+        else:
+            subsections = [section_name]
+        log = []
+        for leaf_section in subsections:
+            if leaf_section in outline:
+                leaf_name = outline[leaf_section]['name']
+            else:
+                leaf_name = outline[section_name]['subsections'][leaf_section]['name']
+            num_rounds = 0
+            while True:
+                print(f"Section: {section_name}, Leaf Section: {leaf_section}, Round: {num_rounds}")
+                num_rounds += 1
+                if num_rounds > MAX_ATTEMPTS:
+                    break
+                poster = create_poster(slide_width, slide_height)
+                add_blank_slide(poster)
+                save_presentation(poster, file_name='poster.pptx')
+                curr_location, zoomed_in_img, zoomed_in_img_path = get_snapshot_from_section(
+                    leaf_section,
+                    section_name,
+                    name_to_hierarchy,
+                    leaf_name,
+                    section_code
+                )
+                if not leaf_section in img_logs:
+                    img_logs[leaf_section] = []
+                img_logs[leaf_section].append(zoomed_in_img)
+                jinja_args = {
+                    'content_json': content[leaf_section] if leaf_section in content else content[section_name]['subsections'][leaf_section],
+                    'existing_code': section_code,
+                }
+                critic_prompt = critic_template.render(**jinja_args)
+                critic_msg = BaseMessage.make_user_message(
+                    role_name="User",
+                    content=critic_prompt,
+                    image_list=[neg_img, pos_img, zoomed_in_img],
+                )
+                critic_agent.reset()
+                response = critic_agent.step(critic_msg)
+                resp = response.msgs[0].content
+                input_token, output_token = account_token(response)
+                total_input_token += input_token
+                total_output_token += output_token
+                if not leaf_section in critic_logs:
+                    critic_logs[leaf_section] = []
+                critic_logs[leaf_section].append(response)
+                if type(resp) == str:
+                    if resp in ['NO', 'NO.', '"NO"', "'NO'"]:
+                        break
+                feedback = get_json_from_response(resp)
+                print(feedback)
+                jinja_args = {
+                    'content_json': content[leaf_section] if leaf_section in content else content[section_name]['subsections'][leaf_section],
+                    'function_docs': documentation,
+                    'existing_code': section_code,
+                    'suggestion_json': feedback,
+                }
+                actor_prompt = actor_template.render(**jinja_args)
+                log = edit_code(actor_agent, actor_prompt, 3, existing_code='')
+                if log[-1]['error'] is not None:
+                    raise Exception(log[-1]['error'])
+                input_token = log[-1]['cumulative_tokens'][0]
+                output_token = log[-1]['cumulative_tokens'][1]
+                total_input_token += input_token
+                total_output_token += output_token
+                section_code = log[-1]['code']
+                if not leaf_section in actor_logs:
+                    actor_logs[leaf_section] = []
+                actor_logs[leaf_section].append(log)
+            if len(log) > 0:
+                style_logs[section_name].append(log[-1])
+    final_code = ''
+    for section in sections:
+        final_code += style_logs[section][-1]['code'] + '\n'
+    run_code_with_utils(final_code, utils_functions)
+    ppt_to_images(f'poster.pptx', 'tmp/non_overlap_preview')
+    result_dir = f'results/{args.poster_name}/{args.model_name}/{args.index}'
+    if not os.path.exists(result_dir):
+        os.makedirs(result_dir)
+    shutil.copy('poster.pptx', f'{result_dir}/non_overlap_poster.pptx')
+    ppt_to_images(f'poster.pptx', f'{result_dir}/non_overlap_poster_preview')
+    final_code_by_section = {}
+    for section in sections:
+        final_code_by_section[section] = style_logs[section][-1]['code']
+    non_overlap_ckpt = {
+        'critic_logs': critic_logs,
+        'actor_logs': actor_logs,
+        'img_logs': img_logs,
+        'name_to_hierarchy': name_to_hierarchy,
+        'final_code': final_code,
+        'final_code_by_section': final_code_by_section,
+        'total_input_token': total_input_token,
+        'total_output_token': total_output_token
+    }
+    pkl.dump(non_overlap_ckpt, open(f'checkpoints/{args.model_name}_{args.poster_name}_non_overlap_ckpt_{args.index}.pkl', 'wb'))
+    return total_input_token, total_output_token
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--poster_name', type=str, default=None)
+    parser.add_argument('--model_name', type=str, default='4o')
+    parser.add_argument('--poster_path', type=str, required=True)
+    parser.add_argument('--index', type=int, default=0)
+    parser.add_argument('--max_retry', type=int, default=3)
+    args = parser.parse_args()
+    actor_config = get_agent_config(args.model_name)
+    critic_config = get_agent_config(args.model_name)
+    if args.poster_name is None:
+        args.poster_name = args.poster_path.split('/')[-1].replace('.pdf', '').replace(' ', '_')
+    input_token, output_token = deoverflow(args, actor_config, critic_config)
+    print(f'Token consumption: {input_token} -> {output_token}')

Paper2Poster/PosterAgent/deoverflow_parallel.py ADDED Viewed

	@@ -0,0 +1,485 @@

+from dotenv import load_dotenv
+from utils.src.utils import ppt_to_images, get_json_from_response
+import json
+from camel.models import ModelFactory
+from camel.agents import ChatAgent
+from utils.wei_utils import *
+from camel.messages import BaseMessage
+from PIL import Image
+import pickle as pkl
+from utils.pptx_utils import *
+from utils.critic_utils import *
+import yaml
+import argparse
+import shutil
+from jinja2 import Environment, StrictUndefined
+from concurrent.futures import ThreadPoolExecutor
+import copy
+load_dotenv()
+MAX_ATTEMPTS = 5
+def process_leaf_section(
+    leaf_section,
+    section_name,
+    outline,
+    content,
+    style_logs,
+    critic_logs,
+    actor_logs,
+    img_logs,
+    slide_width,
+    slide_height,
+    name_to_hierarchy,
+    critic_template,
+    actor_template,
+    critic_agent,
+    actor_agent,
+    neg_img,
+    pos_img,
+    MAX_ATTEMPTS,
+    documentation,
+    total_input_token,
+    total_output_token,
+):
+    """
+    Handles the logic for a single leaf_section within a section_name.
+    Returns a dictionary of updated logs and tokens.
+    """
+    section_code = style_logs[section_name][-1]['code']  # current code for this section
+    log = []
+    leaf_name = None
+    if leaf_section in outline:
+        leaf_name = outline[leaf_section]['name']
+    else:
+        leaf_name = outline[section_name]['subsections'][leaf_section]['name']
+    num_rounds = 0
+    while True:
+        print(f"Section: {section_name}, Leaf Section: {leaf_section}, Round: {num_rounds}")
+        num_rounds += 1
+        if num_rounds > MAX_ATTEMPTS:
+            break
+        poster = create_poster(slide_width, slide_height)
+        add_blank_slide(poster)
+        empty_poster_path = f'tmp/empty_poster_{section_name}_{leaf_section}.pptx'
+        save_presentation(poster, file_name=empty_poster_path)
+        curr_location, zoomed_in_img, zoomed_in_img_path = get_snapshot_from_section(
+            leaf_section,
+            section_name,
+            name_to_hierarchy,
+            leaf_name,
+            section_code,
+            empty_poster_path
+        )
+        if leaf_section not in img_logs:
+            img_logs[leaf_section] = []
+        img_logs[leaf_section].append(zoomed_in_img)
+        jinja_args = {
+            'content_json': content[leaf_section] if leaf_section in content
+                           else content[section_name]['subsections'][leaf_section],
+            'existing_code': section_code,
+        }
+        critic_prompt = critic_template.render(**jinja_args)
+        critic_msg = BaseMessage.make_user_message(
+            role_name="User",
+            content=critic_prompt,
+            image_list=[neg_img, pos_img, zoomed_in_img],
+        )
+        critic_agent.reset()
+        response = critic_agent.step(critic_msg)
+        resp = response.msgs[0].content
+        # Track tokens
+        input_token, output_token = account_token(response)
+        total_input_token += input_token
+        total_output_token += output_token
+        if leaf_section not in critic_logs:
+            critic_logs[leaf_section] = []
+        critic_logs[leaf_section].append(response)
+        # Stop condition
+        if isinstance(resp, str):
+            if resp in ['NO', 'NO.', '"NO"', "'NO'"]:
+                break
+        feedback = get_json_from_response(resp)
+        print(feedback)
+        jinja_args = {
+            'content_json': content[leaf_section] if leaf_section in content
+                           else content[section_name]['subsections'][leaf_section],
+            'function_docs': documentation,
+            'existing_code': section_code,
+            'suggestion_json': feedback,
+        }
+        actor_prompt = actor_template.render(**jinja_args)
+        leaf_log = edit_code(actor_agent, actor_prompt, 3, existing_code='')
+        if leaf_log[-1]['error'] is not None:
+            raise Exception(leaf_log[-1]['error'])
+        # Track tokens
+        in_tok = leaf_log[-1]['cumulative_tokens'][0]
+        out_tok = leaf_log[-1]['cumulative_tokens'][1]
+        total_input_token += in_tok
+        total_output_token += out_tok
+        section_code = leaf_log[-1]['code']
+        if leaf_section not in actor_logs:
+            actor_logs[leaf_section] = []
+        actor_logs[leaf_section].append(leaf_log)
+        log.extend(leaf_log)
+    return {
+        "section_code": section_code,
+        "log": log,
+        "img_logs": img_logs,
+        "critic_logs": critic_logs,
+        "actor_logs": actor_logs,
+        "total_input_token": total_input_token,
+        "total_output_token": total_output_token,
+    }
+def process_section(
+    section_name,
+    content,
+    outline,
+    sections,
+    style_logs,
+    critic_logs,
+    actor_logs,
+    img_logs,
+    slide_width,
+    slide_height,
+    name_to_hierarchy,
+    critic_template,
+    actor_template,
+    critic_agent,
+    actor_agent,
+    neg_img,
+    pos_img,
+    MAX_ATTEMPTS,
+    documentation,
+    total_input_token,
+    total_output_token,
+):
+    """
+    Handles processing of a single section and its subsections (leaf sections).
+    Returns updated logs and token counters for this section.
+    """
+    results_per_leaf = []
+    # Grab the current code for this section
+    section_code = style_logs[section_name][-1]['code']
+    # Determine which leaf sections to process
+    if 'subsections' in content[section_name]:
+        subsections = list(content[section_name]['subsections'].keys())
+    else:
+        subsections = [section_name]
+    all_logs_for_section = []
+    for leaf_section in subsections:
+        # Process this leaf section
+        leaf_result = process_leaf_section(
+            leaf_section,
+            section_name,
+            outline,
+            content,
+            style_logs,
+            critic_logs,
+            actor_logs,
+            img_logs,
+            slide_width,
+            slide_height,
+            name_to_hierarchy,
+            critic_template,
+            actor_template,
+            critic_agent,
+            actor_agent,
+            neg_img,
+            pos_img,
+            MAX_ATTEMPTS,
+            documentation,
+            total_input_token,
+            total_output_token,
+        )
+        # Update logs/tokens
+        section_code = leaf_result["section_code"]
+        all_logs_for_section.extend(leaf_result["log"])
+        img_logs = leaf_result["img_logs"]
+        critic_logs = leaf_result["critic_logs"]
+        actor_logs = leaf_result["actor_logs"]
+        total_input_token = leaf_result["total_input_token"]
+        total_output_token = leaf_result["total_output_token"]
+    # If we have any logs from the last leaf in this section, append them
+    if all_logs_for_section:
+        style_logs[section_name].append(all_logs_for_section[-1])
+    # Return updated state for merging back in the main thread
+    return {
+        "section_name": section_name,
+        "style_logs": style_logs,
+        "critic_logs": critic_logs,
+        "actor_logs": actor_logs,
+        "img_logs": img_logs,
+        "total_input_token": total_input_token,
+        "total_output_token": total_output_token
+    }
+def parallel_by_sections(
+    sections,
+    content,
+    outline,
+    style_logs,
+    critic_logs,
+    actor_logs,
+    img_logs,
+    slide_width,
+    slide_height,
+    name_to_hierarchy,
+    critic_template,
+    actor_template,
+    critic_agent,
+    actor_agent,
+    neg_img,
+    pos_img,
+    MAX_ATTEMPTS,
+    documentation,
+    total_input_token,
+    total_output_token,
+    max_workers=4
+):
+    """
+    Main entry point to parallelize processing across sections.
+    Returns the merged logs and token counters after processing all sections in parallel.
+    """
+    # Because we’ll be modifying dictionaries (like style_logs, etc.),
+    # it can be safer to create a copy for the workers, then merge results
+    # after. (Below is a simple approach—depending on your scale, consider
+    # explicit concurrency controls or a database-backed approach.)
+    # Summaries from each future
+    results = []
+    # We’ll store fresh copies for each section to avoid concurrency collisions
+    # on dictionary updates. If the data is large, you might want a more
+    # sophisticated synchronization or partition approach rather than naive copies.
+    with ThreadPoolExecutor(max_workers=max_workers) as executor:
+        futures = []
+        for section_name in sections:
+            # Make shallow copies or deep copies of logs
+            _style_logs = copy.deepcopy(style_logs)
+            _critic_logs = copy.deepcopy(critic_logs)
+            _actor_logs = copy.deepcopy(actor_logs)
+            _img_logs = copy.deepcopy(img_logs)
+            futures.append(executor.submit(
+                process_section,
+                section_name,
+                content,
+                outline,
+                sections,
+                _style_logs,
+                _critic_logs,
+                _actor_logs,
+                _img_logs,
+                slide_width,
+                slide_height,
+                name_to_hierarchy,
+                critic_template,
+                actor_template,
+                critic_agent,
+                actor_agent,
+                neg_img,
+                pos_img,
+                MAX_ATTEMPTS,
+                documentation,
+                total_input_token,
+                total_output_token
+            ))
+        for future in futures:
+            results.append(future.result())
+    # The code below merges the results.  The method of merging depends on how
+    # you prefer to aggregate.  For a minimal approach, we’ll pick the logs from
+    # each section, then overwrite or update them in the main dictionaries.
+    for res in results:
+        sec_name = res["section_name"]
+        # Overwrite or merge logs as needed
+        style_logs[sec_name] = res["style_logs"][sec_name]
+        critic_logs.update(res["critic_logs"])
+        actor_logs.update(res["actor_logs"])
+        img_logs.update(res["img_logs"])
+        total_input_token = res["total_input_token"]
+        total_output_token = res["total_output_token"]
+    return style_logs, critic_logs, actor_logs, img_logs, total_input_token, total_output_token
+def deoverflow(args, actor_config, critic_config):
+    total_input_token, total_output_token = 0, 0
+    style_ckpt = pkl.load(open(f'checkpoints/{args.model_name}_{args.poster_name}_style_ckpt_{args.index}.pkl', 'rb'))
+    logs_ckpt = pkl.load(open(f'checkpoints/{args.model_name}_{args.poster_name}_ckpt_{args.index}.pkl', 'rb'))
+    style_logs = style_ckpt['style_logs']
+    sections = list(style_logs.keys())
+    sections = [s for s in sections if s != 'meta']
+    slide_width = style_ckpt['outline']['meta']['width']
+    slide_height = style_ckpt['outline']['meta']['height']
+    content = json.load(open(f'contents/{args.model_name}_{args.poster_name}_poster_content_{args.index}.json', 'r'))
+    outline = logs_ckpt['outline']
+    name_to_hierarchy = get_hierarchy(outline, 1)
+    critic_agent_name = 'critic_overlap_agent'
+    with open(f"prompt_templates/{critic_agent_name}.yaml", "r") as f:
+        deoverflow_critic_config = yaml.safe_load(f)
+    actor_agent_name = 'actor_editor_agent'
+    with open(f"prompt_templates/{actor_agent_name}.yaml", "r") as f:
+        deoverflow_actor_config = yaml.safe_load(f)
+    actor_model = ModelFactory.create(
+        model_platform=actor_config['model_platform'],
+        model_type=actor_config['model_type'],
+        model_config_dict=actor_config['model_config'],
+    )
+    actor_sys_msg = deoverflow_actor_config['system_prompt']
+    actor_agent = ChatAgent(
+        system_message=actor_sys_msg,
+        model=actor_model,
+        message_window_size=10,
+    )
+    critic_model = ModelFactory.create(
+        model_platform=critic_config['model_platform'],
+        model_type=critic_config['model_type'],
+        model_config_dict=critic_config['model_config'],
+    )
+    critic_sys_msg = deoverflow_critic_config['system_prompt']
+    critic_agent = ChatAgent(
+        system_message=critic_sys_msg,
+        model=critic_model,
+        message_window_size=None,
+    )
+    jinja_env = Environment(undefined=StrictUndefined)
+    actor_template = jinja_env.from_string(deoverflow_actor_config["template"])
+    critic_template = jinja_env.from_string(deoverflow_critic_config["template"])
+    critic_logs = {}
+    actor_logs = {}
+    img_logs = {}
+    # Load neg and pos examples
+    neg_img = Image.open('overflow_example/neg.jpg')
+    pos_img = Image.open('overflow_example/pos.jpg')
+    style_logs, critic_logs, actor_logs, img_logs, total_input_token, total_output_token = parallel_by_sections(
+        sections=sections,
+        content=content,
+        outline=outline,
+        style_logs=style_logs,
+        critic_logs=critic_logs,
+        actor_logs=actor_logs,
+        img_logs=img_logs,
+        slide_width=slide_width,
+        slide_height=slide_height,
+        name_to_hierarchy=name_to_hierarchy,
+        critic_template=critic_template,
+        actor_template=actor_template,
+        critic_agent=critic_agent,
+        actor_agent=actor_agent,
+        neg_img=neg_img,
+        pos_img=pos_img,
+        MAX_ATTEMPTS=MAX_ATTEMPTS,
+        documentation=documentation,
+        total_input_token=total_input_token,
+        total_output_token=total_output_token,
+        max_workers=100,  # or however many worker threads you want
+    )
+    final_code = ''
+    for section in sections:
+        final_code += style_logs[section][-1]['code'] + '\n'
+    run_code_with_utils(final_code, utils_functions)
+    ppt_to_images(f'poster.pptx', 'tmp/non_overlap_preview')
+    result_dir = f'results/{args.poster_name}/{args.model_name}/{args.index}'
+    if not os.path.exists(result_dir):
+        os.makedirs(result_dir)
+    shutil.copy('poster.pptx', f'{result_dir}/non_overlap_poster.pptx')
+    ppt_to_images(f'poster.pptx', f'{result_dir}/non_overlap_poster_preview')
+    final_code_by_section = {}
+    for section in sections:
+        final_code_by_section[section] = style_logs[section][-1]['code']
+    non_overlap_ckpt = {
+        'critic_logs': critic_logs,
+        'actor_logs': actor_logs,
+        'img_logs': img_logs,
+        'name_to_hierarchy': name_to_hierarchy,
+        'final_code': final_code,
+        'final_code_by_section': final_code_by_section,
+        'total_input_token': total_input_token,
+        'total_output_token': total_output_token
+    }
+    pkl.dump(non_overlap_ckpt, open(f'checkpoints/{args.model_name}_{args.poster_name}_non_overlap_ckpt_{args.index}.pkl', 'wb'))
+    return total_input_token, total_output_token
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--poster_name', type=str, default=None)
+    parser.add_argument('--model_name', type=str, default='4o')
+    parser.add_argument('--poster_path', type=str, required=True)
+    parser.add_argument('--index', type=int, default=0)
+    parser.add_argument('--max_retry', type=int, default=3)
+    args = parser.parse_args()
+    actor_config = get_agent_config(args.model_name)
+    critic_config = get_agent_config(args.model_name)
+    if args.poster_name is None:
+        args.poster_name = args.poster_path.split('/')[-1].replace('.pdf', '').replace(' ', '_')
+    input_token, output_token = deoverflow(args, actor_config, critic_config)
+    print(f'Token consumption: {input_token} -> {output_token}')

Paper2Poster/PosterAgent/fill_and_style.py ADDED Viewed

	@@ -0,0 +1,215 @@

+from dotenv import load_dotenv
+import os
+from utils.src.utils import ppt_to_images, get_json_from_response
+import json
+import pptx
+from camel.models import ModelFactory
+from camel.types import ModelPlatformType, ModelType
+from camel.configs import ChatGPTConfig, QwenConfig
+from camel.agents import ChatAgent
+from utils.wei_utils import fill_content
+from camel.messages import BaseMessage
+from PIL import Image
+import pickle as pkl
+from utils.pptx_utils import *
+from utils.critic_utils import *
+from utils.wei_utils import *
+import importlib
+import yaml
+import os
+import shutil
+from datetime import datetime
+from jinja2 import Environment, StrictUndefined, Template
+import argparse
+load_dotenv()
+def fill_poster_content(args, actor_config):
+    total_input_token, total_output_token = 0, 0
+    poster_content = json.load(open(f'contents/{args.model_name}_{args.poster_name}_poster_content_{args.index}.json', 'r'))
+    agent_name = 'content_filler_agent'
+    with open(f"prompt_templates/{agent_name}.yaml", "r") as f:
+        fill_config = yaml.safe_load(f)
+    actor_model = ModelFactory.create(
+        model_platform=actor_config['model_platform'],
+        model_type=actor_config['model_type'],
+        model_config_dict=actor_config['model_config'],
+    )
+    actor_sys_msg = fill_config['system_prompt']
+    actor_agent = ChatAgent(
+        system_message=actor_sys_msg,
+        model=actor_model,
+        message_window_size=10,
+    )
+    ckpt = pkl.load(open(f'checkpoints/{args.model_name}_{args.poster_name}_ckpt_{args.index}.pkl', 'rb'))
+    logs = ckpt['logs']
+    outline = ckpt['outline']
+    sections = list(outline.keys())
+    sections = [s for s in sections if s != 'meta']
+    jinja_env = Environment(undefined=StrictUndefined)
+    template = jinja_env.from_string(fill_config["template"])
+    content_logs = {}
+    for section_index in range(len(sections)):
+        section_name = sections[section_index]
+        section_code = logs[section_name][-1]['code']
+        print(f'Filling content for {section_name}')
+        jinja_args = {
+            'content_json': poster_content[section_name],
+            'function_docs': documentation,
+            'existing_code': section_code
+        }
+        prompt = template.render(**jinja_args)
+        if section_index == 0:
+            existing_code = ''
+        else:
+            existing_code = content_logs[sections[section_index - 1]][-1]['concatenated_code']
+        content_logs[section_name] = fill_content(
+            actor_agent,
+            prompt,
+            3,
+            existing_code
+        )
+        shutil.copy('poster.pptx', f'tmp/content_poster_<{section_name}>.pptx')
+        if content_logs[section_name][-1]['error'] is not None:
+            raise Exception(f'Error in filling content for {section_name}: {content_logs[section_name][-1]["error"]}')
+        total_input_token += content_logs[section_name][-1]['cumulative_tokens'][0]
+        total_output_token += content_logs[section_name][-1]['cumulative_tokens'][1]
+    ppt_to_images(f'tmp/content_poster_<{sections[-1]}>.pptx', 'tmp/content_preview')
+    ckpt = {
+        'logs': logs,
+        'content_logs': content_logs,
+        'outline': outline,
+        'total_input_token': total_input_token,
+        'total_output_token': total_output_token
+    }
+    pkl.dump(ckpt, open(f'checkpoints/{args.model_name}_{args.poster_name}_content_ckpt_{args.index}.pkl', 'wb'))
+    return total_input_token, total_output_token
+def stylize_poster(args, actor_config):
+    total_input_token, total_output_token = 0, 0
+    poster_content = json.load(open(f'contents/{args.model_name}_{args.poster_name}_poster_content_{args.index}.json', 'r'))
+    agent_name = 'style_agent'
+    with open(f"prompt_templates/{agent_name}.yaml", "r") as f:
+        style_config = yaml.safe_load(f)
+    actor_model = ModelFactory.create(
+        model_platform=actor_config['model_platform'],
+        model_type=actor_config['model_type'],
+        model_config_dict=actor_config['model_config'],
+    )
+    actor_sys_msg = style_config['system_prompt']
+    actor_agent = ChatAgent(
+        system_message=actor_sys_msg,
+        model=actor_model,
+        message_window_size=10,
+    )
+    ckpt = pkl.load(open(f'checkpoints/{args.model_name}_{args.poster_name}_content_ckpt_{args.index}.pkl', 'rb'))
+    content_logs = ckpt['content_logs']
+    outline = ckpt['outline']
+    sections = list(outline.keys())
+    sections = [s for s in sections if s != 'meta']
+    jinja_env = Environment(undefined=StrictUndefined)
+    template = jinja_env.from_string(style_config["template"])
+    style_logs = {}
+    for section_index in range(len(sections)):
+        section_name = sections[section_index]
+        section_outline = json.dumps(outline[section_name])
+        section_code = content_logs[section_name][-1]['code']
+        print(f'Stylizing for {section_name}')
+        img_ratio_json = get_img_ratio_in_section(poster_content[section_name])
+        jinja_args = {
+            'content_json': poster_content[section_name],
+            'function_docs': documentation,
+            'existing_code': section_code,
+            'image_ratio': img_ratio_json,
+        }
+        prompt = template.render(**jinja_args)
+        if section_index == 0:
+            existing_code = ''
+        else:
+            existing_code = style_logs[sections[section_index - 1]][-1]['concatenated_code']
+        style_logs[section_name] = stylize(
+            actor_agent,
+            prompt,
+            args.max_retry,
+            existing_code
+        )
+        shutil.copy('poster.pptx', f'tmp/style_poster_<{section_name}>.pptx')
+        if style_logs[section_name][-1]['error'] is not None:
+            raise Exception(f'Error in stylizing for {section_name}')
+        total_input_token += style_logs[section_name][-1]['cumulative_tokens'][0]
+        total_output_token += style_logs[section_name][-1]['cumulative_tokens'][1]
+    ppt_to_images(f'tmp/style_poster_<{sections[-1]}>.pptx', 'tmp/style_preview')
+    ckpt = {
+        'logs': ckpt['logs'],
+        'content_logs': content_logs,
+        'style_logs': style_logs,
+        'outline': outline,
+        'total_input_token': total_input_token,
+        'total_output_token': total_output_token
+    }
+    with open(f'checkpoints/{args.model_name}_{args.poster_name}_style_ckpt_{args.index}.pkl', 'wb') as f:
+        pkl.dump(ckpt, f)
+    return total_input_token, total_output_token
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--poster_name', type=str, default=None)
+    parser.add_argument('--model_name', type=str, default='4o')
+    parser.add_argument('--poster_path', type=str, required=True)
+    parser.add_argument('--index', type=int, default=0)
+    parser.add_argument('--max_retry', type=int, default=3)
+    args = parser.parse_args()
+    actor_config = get_agent_config(args.model_name)
+    if args.poster_name is None:
+        args.poster_name = args.poster_path.split('/')[-1].replace('.pdf', '').replace(' ', '_')
+    fill_total_input_token, fill_total_output_token = fill_poster_content(args, actor_config)
+    style_total_input_token, style_total_output_token = stylize_poster(args, actor_config)
+    total_input_token = fill_total_input_token + style_total_input_token
+    total_output_token = fill_total_output_token + style_total_output_token
+    print(f'Token consumption: {total_input_token} -> {total_output_token}')

Paper2Poster/PosterAgent/gen_beamer_code.py ADDED Viewed

	@@ -0,0 +1,299 @@

+import re
+import json
+import os
+from typing import List, Dict, Any
+def sanitize_for_latex(name):
+    """Convert any character that is not alphanumeric into underscore for LaTeX compatibility."""
+    return re.sub(r'[^0-9a-zA-Z_]+', '_', name)
+def initialize_beamer_document(width_cm=120, height_cm=90, theme="default"):
+    """
+    Initialize a Beamer document with specified dimensions and theme.
+    Args:
+        width_cm: Width in centimeters (default 120cm for poster)
+        height_cm: Height in centimeters (default 90cm for poster)
+        theme: Beamer theme name (default, Madrid, Warsaw, etc.)
+    """
+    code = f'''\\documentclass[aspectratio=169]{{beamer}}
+\\usepackage[utf8]{{inputenc}}
+\\usepackage[T1]{{fontenc}}
+\\usepackage{{graphicx}}
+\\usepackage{{tikz}}
+\\usepackage{{xcolor}}
+\\usepackage{{geometry}}
+\\usepackage{{multicol}}
+\\usepackage{{array}}
+\\usepackage{{booktabs}}
+\\usepackage{{adjustbox}}
+% Set page dimensions for poster
+\\geometry{{paperwidth={width_cm}cm, paperheight={height_cm}cm, margin=1cm}}
+% Beamer theme
+\\usetheme{{{theme}}}
+\\usecolortheme{{default}}
+% Custom colors
+\\definecolor{{titlecolor}}{{RGB}}{{47, 85, 151}}
+\\definecolor{{textcolor}}{{RGB}}{{0, 0, 0}}
+\\definecolor{{bgcolor}}{{RGB}}{{255, 255, 255}}
+% Remove navigation symbols
+\\setbeamertemplate{{navigation symbols}}{{}}
+% Custom title page
+\\setbeamertemplate{{title page}}{{
+    \\begin{{center}}
+        \\vspace{{1cm}}
+        {{\\color{{titlecolor}}\\Huge\\textbf{{\\inserttitle}}}}
+        \\vspace{{0.5cm}}
+        \\Large{{\\insertauthor}}
+        \\vspace{{0.3cm}}
+        \\normalsize{{\\insertinstitute}}
+    \\end{{center}}
+}}
+% Custom frame title
+\\setbeamertemplate{{frametitle}}{{
+    \\vspace{{0.5cm}}
+    \\begin{{flushleft}}
+        {{\\color{{titlecolor}}\\Large\\textbf{{\\insertframetitle}}}}
+    \\end{{flushleft}}
+    \\vspace{{0.3cm}}
+}}
+\\begin{{document}}
+% Title frame
+\\title{{POSTER_TITLE_PLACEHOLDER}}
+\\author{{POSTER_AUTHOR_PLACEHOLDER}}
+\\institute{{POSTER_INSTITUTE_PLACEHOLDER}}
+\\date{{\\today}}
+\\begin{{frame}}[plain]
+    \\titlepage
+\\end{{frame}}
+'''
+    return code
+def generate_beamer_section_code(section_data: Dict[str, Any], section_index: int):
+    """
+    兼容 Paper2Poster bullet JSON:
+    - section_data 包含 title_blocks / textbox1_blocks / textbox2_blocks
+    - 每个 *_blocks 是 list[ {bullet: bool, runs: [{text: str, ...}], ...} ]
+    """
+    def blocks_to_lines(blocks):
+        """把 blocks 转成 list[str]，并标注是否 bullet"""
+        lines = []
+        for blk in blocks or []:
+            text = " ".join([r.get("text","") for r in blk.get("runs", [])]).strip()
+            if not text:
+                continue
+            lines.append({
+                "text": text,
+                "bullet": bool(blk.get("bullet", False))
+            })
+        return lines
+    # Frame title 优先用 title_blocks 的文本，否则用 title_str，否则 Untitled
+    if isinstance(section_data.get("title_blocks"), list) and section_data["title_blocks"]:
+        frame_title = " ".join([r.get("text","") for r in section_data["title_blocks"][0].get("runs", [])]).strip()
+    else:
+        frame_title = section_data.get("title_str") or "Untitled"
+    frame_title = frame_title.replace("{","\\{").replace("}","\\}")  # 简单转义以防标题含花括号
+    code = f"\n% ===== Section {section_index} =====\n"
+    code += f"\\begin{{frame}}[t]{{{frame_title}}}\n"
+    code += "  \\vspace{-0.5cm}\n"
+    for key in ["textbox1_blocks", "textbox2_blocks"]:
+        lines = blocks_to_lines(section_data.get(key, []))
+        if not lines:
+            continue
+        # 如果全是 bullet，就合并成一个 itemize；否则分别处理
+        if all(l["bullet"] for l in lines):
+            code += "  \\begin{itemize}\n"
+            for l in lines:
+                code += f"    \\item {l['text']}\n"
+            code += "  \\end{itemize}\n"
+        else:
+            for l in lines:
+                if l["bullet"]:
+                    code += f"  \\begin{{itemize}}\\item {l['text']}\\end{{itemize}}\n"
+                else:
+                    code += f"  {l['text']}\\\\\n"
+    code += "\\end{frame}\n\n"
+    return code
+def generate_beamer_figure_code(figure_data: Dict[str, Any], figure_index: int):
+    """
+    Generate Beamer code for including figures.
+    Args:
+        figure_data: Dictionary containing figure information
+        figure_index: Index of the figure
+    """
+    figure_name = sanitize_for_latex(figure_data.get('figure_name', f'figure_{figure_index}'))
+    figure_path = figure_data.get('figure_path', '')
+    # Convert inches to centimeters (1 inch = 2.54 cm)
+    width_cm = figure_data.get('width', 10) * 2.54
+    height_cm = figure_data.get('height', 8) * 2.54
+    code = f'''
+% Figure: {figure_name}
+\\begin{{frame}}[t]{{{figure_data.get('title', 'Figure')}}}
+    \\vspace{{-0.5cm}}
+    \\begin{{center}}
+        \\includegraphics[width={width_cm:.2f}cm, height={height_cm:.2f}cm]{{{figure_path}}}
+    \\end{{center}}
+    \\vspace{{0.3cm}}
+    \\begin{{center}}
+        \\small{{\\textbf{{{figure_data.get('caption', 'Figure Caption')}}}}}
+    \\end{{center}}
+\\end{{frame}}
+'''
+    return code
+def generate_beamer_poster_code(
+    sections: List[Dict[str, Any]],
+    figures: List[Dict[str, Any]],
+    poster_info: Dict[str, str],
+    width_cm: float = 120,
+    height_cm: float = 90,
+    theme: str = "default",
+    output_path: str = "poster.tex"
+):
+    """
+    Generate complete Beamer poster code.
+    Args:
+        sections: List of section dictionaries
+        figures: List of figure dictionaries
+        poster_info: Dictionary with title, author, institute
+        width_cm: Poster width in centimeters
+        height_cm: Poster height in centimeters
+        theme: Beamer theme name
+        output_path: Output .tex file path
+    """
+    code = initialize_beamer_document(width_cm, height_cm, theme)
+    # Replace placeholders with actual content
+    code = code.replace('POSTER_TITLE_PLACEHOLDER', poster_info.get('title', 'Poster Title'))
+    code = code.replace('POSTER_AUTHOR_PLACEHOLDER', poster_info.get('author', 'Author Name'))
+    code = code.replace('POSTER_INSTITUTE_PLACEHOLDER', poster_info.get('institute', 'Institute Name'))
+    # Add sections
+    for i, section in enumerate(sections):
+        code += generate_beamer_section_code(section, i)
+    # Add figures
+    for i, figure in enumerate(figures):
+        code += generate_beamer_figure_code(figure, i)
+    # Close document
+    code += '''
+\\end{document}
+'''
+    return code
+def save_beamer_code(code: str, output_path: str):
+    """Save Beamer code to file."""
+    with open(output_path, 'w', encoding='utf-8') as f:
+        f.write(code)
+def compile_beamer_to_pdf(tex_path: str, output_dir: str = "."):
+    """
+    Compile Beamer .tex file to PDF using pdflatex.
+    Args:
+        tex_path: Path to .tex file
+        output_dir: Output directory for PDF
+    """
+    import subprocess
+    try:
+        # Run pdflatex twice for proper cross-references
+        result1 = subprocess.run(
+            ['pdflatex', '-output-directory', output_dir, tex_path],
+            capture_output=True,
+            text=True,
+            timeout=60
+        )
+        result2 = subprocess.run(
+            ['pdflatex', '-output-directory', output_dir, tex_path],
+            capture_output=True,
+            text=True,
+            timeout=60
+        )
+        if result1.returncode == 0 and result2.returncode == 0:
+            print(f"Successfully compiled {tex_path} to PDF")
+            return True
+        else:
+            print(f"Error compiling {tex_path}:")
+            print(result1.stderr)
+            print(result2.stderr)
+            return False
+    except subprocess.TimeoutExpired:
+        print(f"Timeout while compiling {tex_path}")
+        return False
+    except Exception as e:
+        print(f"Error compiling {tex_path}: {e}")
+        return False
+# Example usage functions
+def convert_pptx_layout_to_beamer(pptx_layout_data: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Convert PowerPoint layout data to Beamer-compatible format.
+    Args:
+        pptx_layout_data: Layout data from PowerPoint generation
+    """
+    beamer_data = {
+        'sections': [],
+        'figures': [],
+        'poster_info': {
+            'title': 'Default Title',
+            'author': 'Default Author',
+            'institute': 'Default Institute'
+        }
+    }
+    # Convert text arrangements to sections
+    if 'text_arrangement' in pptx_layout_data:
+        for i, text_item in enumerate(pptx_layout_data['text_arrangement']):
+            section = {
+                'section_name': text_item.get('textbox_name', f'section_{i}'),
+                'title': text_item.get('title', f'Section {i+1}'),
+                'content': text_item.get('content', 'Content placeholder')
+            }
+            beamer_data['sections'].append(section)
+    # Convert figure arrangements to figures
+    if 'figure_arrangement' in pptx_layout_data:
+        for i, figure_item in enumerate(pptx_layout_data['figure_arrangement']):
+            figure = {
+                'figure_name': figure_item.get('figure_name', f'figure_{i}'),
+                'figure_path': figure_item.get('figure_path', ''),
+                'width': figure_item.get('width', 10),
+                'height': figure_item.get('height', 8),
+                'title': figure_item.get('title', f'Figure {i+1}'),
+                'caption': figure_item.get('caption', 'Figure caption')
+            }
+            beamer_data['figures'].append(figure)
+    return beamer_data

Paper2Poster/PosterAgent/gen_outline_layout.py ADDED Viewed

	@@ -0,0 +1,851 @@

+from dotenv import load_dotenv
+import os
+import json
+import copy
+import yaml
+from jinja2 import Environment, StrictUndefined
+from utils.src.utils import ppt_to_images, get_json_from_response
+from camel.models import ModelFactory
+from camel.agents import ChatAgent
+from camel.messages import BaseMessage
+from utils.pptx_utils import *
+from utils.wei_utils import *
+import pickle as pkl
+import argparse
+load_dotenv()
+IMAGE_SCALE_RATIO_MIN = 50
+IMAGE_SCALE_RATIO_MAX = 40
+TABLE_SCALE_RATIO_MIN = 100
+TABLE_SCALE_RATIO_MAX = 80
+def compute_tp(raw_content_json):
+    total_length = 0
+    for section in raw_content_json['sections']:
+        total_length += len(section['content'])
+    for i in range(len(raw_content_json['sections'])):
+        raw_content_json['sections'][i]['tp'] = len(raw_content_json['sections'][i]['content']) / total_length
+        raw_content_json['sections'][i]['text_len'] = len(raw_content_json['sections'][i]['content'])
+def compute_gp(table_info, image_info):
+    total_area = 0
+    for k, v in table_info.items():
+        total_area += v['figure_size']
+    for k, v in image_info.items():
+        total_area += v['figure_size']
+    for k, v in table_info.items():
+        v['gp'] = v['figure_size'] / total_area
+    for k, v in image_info.items():
+        v['gp'] = v['figure_size'] / total_area
+def get_outline_location(outline, subsection=False):
+    outline_location = {}
+    for k, v in outline.items():
+        if k == 'meta':
+            continue
+        outline_location[k] = {
+            'location': v['location'],
+        }
+        if subsection:
+            if 'subsections' in v:
+                outline_location[k]['subsections'] = get_outline_location(v['subsections'])
+    return outline_location
+def apply_outline_location(outline, location, subsection=False):
+    new_outline = {}
+    for k, v in outline.items():
+        if k == 'meta':
+            new_outline[k] = v
+            continue
+        new_outline[k] = copy.deepcopy(v)
+        new_outline[k]['location'] = location[k]['location']
+        if subsection:
+            if 'subsections' in v:
+                new_outline[k]['subsections'] = apply_outline_location(v['subsections'], location[k]['subsections'])
+    return new_outline
+def fill_location(outline, section_name, location_dict):
+    new_outline = copy.deepcopy(outline)
+    if 'subsections' not in new_outline[section_name]:
+        return new_outline
+    for k, v in new_outline[section_name]['subsections'].items():
+        v['location'] = location_dict[k]['location']
+    return new_outline
+def recover_name_and_location(outline_no_name, outline):
+    new_outline = copy.deepcopy(outline_no_name)
+    for k, v in outline_no_name.items():
+        if k == 'meta':
+            continue
+        new_outline[k]['name'] = outline[k]['name']
+        if type(new_outline[k]['location']) == list:
+            new_outline[k]['location'] = {
+                'left': v['location'][0],
+                'top': v['location'][1],
+                'width': v['location'][2],
+                'height': v['location'][3]
+            }
+        if 'subsections' in v:
+            for k_sub, v_sub in v['subsections'].items():
+                new_outline[k]['subsections'][k_sub]['name'] = outline[k]['subsections'][k_sub]['name']
+                if type(new_outline[k]['subsections'][k_sub]['location']) == list:
+                    new_outline[k]['subsections'][k_sub]['location'] = {
+                        'left': v_sub['location'][0],
+                        'top': v_sub['location'][1],
+                        'width': v_sub['location'][2],
+                        'height': v_sub['location'][3]
+                    }
+    return new_outline
+def validate_and_adjust_subsections(section_bbox, subsection_bboxes):
+    """
+    Validate that the given subsections collectively occupy the entire section.
+    If not, return an adjusted version that fixes the layout.
+    We assume all subsections are intended to be stacked vertically with no gaps,
+    spanning the full width of the section.
+    :param section_bbox: dict with keys ["left", "top", "width", "height"]
+    :param subsection_bboxes: dict of subsection_name -> bounding_box (each also
+                              with keys ["left", "top", "width", "height"])
+    :return: (is_valid, revised_subsections)
+             where is_valid is True/False,
+             and revised_subsections is either the same as subsection_bboxes if valid,
+             or a new dict of adjusted bounding boxes if invalid.
+    """
+    # Helper functions
+    def _right(bbox):
+        return bbox["left"] + bbox["width"]
+    def _bottom(bbox):
+        return bbox["top"] + bbox["height"]
+    section_left = section_bbox["left"]
+    section_top = section_bbox["top"]
+    section_right = section_left + section_bbox["width"]
+    section_bottom = section_top + section_bbox["height"]
+    # Convert dictionary to a list of (subsection_name, bbox) pairs
+    items = list(subsection_bboxes.items())
+    if not items:
+        # No subsections is definitely not valid if we want to fill the section
+        return False, None
+    # Sort subsections by their 'top' coordinate
+    items_sorted = sorted(items, key=lambda x: x[1]["top"])
+    # ---------------------------
+    # Step 1: Validate
+    # ---------------------------
+    # We'll check:
+    # 1. left/right boundaries match the section for each subsection
+    # 2. The first subsection's top == section_top
+    # 3. The last subsection's bottom == section_bottom
+    # 4. Each pair of consecutive subsections lines up exactly
+    #    (previous bottom == current top) with no gap or overlap.
+    is_valid = True
+    # Check left/right for each
+    for name, bbox in items_sorted:
+        if bbox["left"] != section_left or _right(bbox) != section_right:
+            is_valid = False
+            break
+    # Check alignment for the first and last
+    if is_valid:
+        first_sub_name, first_sub_bbox = items_sorted[0]
+        if first_sub_bbox["top"] != section_top:
+            is_valid = False
+    if is_valid:
+        last_sub_name, last_sub_bbox = items_sorted[-1]
+        if _bottom(last_sub_bbox) != section_bottom:
+            is_valid = False
+    # Check consecutive alignment
+    if is_valid:
+        for i in range(len(items_sorted) - 1):
+            _, current_bbox  = items_sorted[i]
+            _, next_bbox     = items_sorted[i + 1]
+            if _bottom(current_bbox) != next_bbox["top"]:
+                is_valid = False
+                break
+    # If everything passed, we return
+    if is_valid:
+        return True, subsection_bboxes
+    # ---------------------------
+    # Step 2: Revise
+    # ---------------------------
+    # We will adjust all subsection bboxes so that they occupy
+    # the entire section exactly, preserving each original bbox's
+    # height *ratio* if possible.
+    # 2a. Compute total original height (in the order of sorted items)
+    original_heights = [bbox["height"] for _, bbox in items_sorted]
+    total_original_height = sum(original_heights)
+    # Avoid divide-by-zero if somehow there's a 0 height
+    if total_original_height <= 0:
+        # Fallback: split the section equally among subsections
+        # to avoid zero or negative heights
+        chunk_height = section_bbox["height"] / len(items_sorted)
+        scale_heights = [chunk_height] * len(items_sorted)
+    else:
+        # Scale each original height by the ratio of
+        # (section total height / sum of original heights)
+        scale = section_bbox["height"] / total_original_height
+        scale_heights = [h * scale for h in original_heights]
+    # 2b. Assign bounding boxes top->bottom, ensuring no gap
+    revised = {}
+    current_top = section_top
+    for i, (name, original_bbox) in enumerate(items_sorted):
+        revised_height = scale_heights[i]
+        # If there's floating error, we can clamp in the last iteration
+        # so that the bottom exactly matches section_bottom.
+        # But for simplicity, we'll keep it straightforward unless needed.
+        revised[name] = {
+            "left": section_left,
+            "top": current_top,
+            "width": section_bbox["width"],
+            "height": revised_height
+        }
+        # Update current_top for next subsection
+        current_top += revised_height
+    # Due to potential float rounding, we can enforce the last subsection
+    # to exactly end at section_bottom:
+    last_name = items_sorted[-1][0]
+    # Recompute the actual bottom after the above assignment
+    new_bottom = revised[last_name]["top"] + revised[last_name]["height"]
+    diff = new_bottom - section_bottom
+    if abs(diff) > 1e-9:
+        # Adjust the last subsection's height
+        revised[last_name]["height"] -= diff
+    # Return the revised dictionary
+    return False, revised
+def filter_image_table(args, filter_config):
+    images = json.load(open(f'<{args.model_name_t}_{args.model_name_v}>_images_and_tables/{args.poster_name}_images.json', 'r'))
+    tables = json.load(open(f'<{args.model_name_t}_{args.model_name_v}>_images_and_tables/{args.poster_name}_tables.json', 'r'))
+    doc_json = json.load(open(f'contents/<{args.model_name_t}_{args.model_name_v}>_{args.poster_name}_raw_content.json', 'r'))
+    agent_filter = 'image_table_filter_agent'
+    with open(f"utils/prompt_templates/{agent_filter}.yaml", "r", encoding="utf-8") as f:
+        config_filter = yaml.safe_load(f)
+    image_information = {}
+    for k, v in images.items():
+        image_information[k] = copy.deepcopy(v)
+        image_information[k]['min_width'] = v['width'] // IMAGE_SCALE_RATIO_MIN
+        image_information[k]['min_height'] = v['height'] // IMAGE_SCALE_RATIO_MIN
+        image_information[k]['max_width'] = v['width'] // IMAGE_SCALE_RATIO_MAX
+        image_information[k]['max_height'] = v['height'] // IMAGE_SCALE_RATIO_MAX
+    table_information = {}
+    for k, v in tables.items():
+        table_information[k] = copy.deepcopy(v)
+        table_information[k]['min_width'] = v['width'] // TABLE_SCALE_RATIO_MIN
+        table_information[k]['min_height'] = v['height'] // TABLE_SCALE_RATIO_MIN
+        table_information[k]['max_width'] = v['width'] // TABLE_SCALE_RATIO_MAX
+        table_information[k]['max_height'] = v['height'] // TABLE_SCALE_RATIO_MAX
+    filter_actor_sys_msg = config_filter['system_prompt']
+    if args.model_name_t.startswith('vllm_qwen'):
+        filter_model = ModelFactory.create(
+            model_platform=filter_config['model_platform'],
+            model_type=filter_config['model_type'],
+            model_config_dict=filter_config['model_config'],
+            url=filter_config['url'],
+        )
+    else:
+        filter_model = ModelFactory.create(
+            model_platform=filter_config['model_platform'],
+            model_type=filter_config['model_type'],
+            model_config_dict=filter_config['model_config'],
+        )
+    filter_actor_agent = ChatAgent(
+        system_message=filter_actor_sys_msg,
+        model=filter_model,
+        message_window_size=10,
+    )
+    filter_jinja_args = {
+        'json_content': doc_json,
+        'table_information': json.dumps(table_information, indent=4),
+        'image_information': json.dumps(image_information, indent=4),
+    }
+    jinja_env = Environment(undefined=StrictUndefined)
+    filter_prompt = jinja_env.from_string(config_filter["template"])
+    filter_actor_agent.reset()
+    response = filter_actor_agent.step(filter_prompt.render(**filter_jinja_args))
+    input_token, output_token = account_token(response)
+    response_json = get_json_from_response(response.msgs[0].content)
+    table_information = response_json['table_information']
+    image_information = response_json['image_information']
+    json.dump(images, open(f'<{args.model_name_t}_{args.model_name_v}>_images_and_tables/{args.poster_name}_images_filtered.json', 'w'), indent=4)
+    json.dump(tables, open(f'<{args.model_name_t}_{args.model_name_v}>_images_and_tables/{args.poster_name}_tables_filtered.json', 'w'), indent=4)
+    return input_token, output_token
+def gen_outline_layout_v2(args, actor_config):
+    total_input_token, total_output_token = 0, 0
+    agent_name = 'poster_planner_new_v2'
+    doc_json = json.load(open(f'contents/<{args.model_name_t}_{args.model_name_v}>_{args.poster_name}_raw_content.json', 'r'))
+    filtered_table_information = json.load(open(f'<{args.model_name_t}_{args.model_name_v}>_images_and_tables/{args.poster_name}_tables_filtered.json', 'r'))
+    filtered_image_information = json.load(open(f'<{args.model_name_t}_{args.model_name_v}>_images_and_tables/{args.poster_name}_images_filtered.json', 'r'))
+    filtered_table_information_captions = {}
+    filtered_image_information_captions = {}
+    for k, v in filtered_table_information.items():
+        filtered_table_information_captions[k] = {
+            v['caption']
+        }
+    for k, v in filtered_image_information.items():
+        filtered_image_information_captions[k] = {
+            v['caption']
+        }
+    with open(f"utils/prompt_templates/{agent_name}.yaml", "r", encoding="utf-8") as f:
+        planner_config = yaml.safe_load(f)
+    compute_tp(doc_json)
+    jinja_env = Environment(undefined=StrictUndefined)
+    outline_template = jinja_env.from_string(planner_config["template"])
+    planner_jinja_args = {
+        'json_content': doc_json,
+        'table_information': filtered_table_information_captions,
+        'image_information': filtered_image_information_captions,
+    }
+    if args.model_name_t.startswith('vllm_qwen'):
+        planner_model = ModelFactory.create(
+            model_platform=actor_config['model_platform'],
+            model_type=actor_config['model_type'],
+            model_config_dict=actor_config['model_config'],
+            url=actor_config['url'],
+        )
+    else:
+        planner_model = ModelFactory.create(
+            model_platform=actor_config['model_platform'],
+            model_type=actor_config['model_type'],
+            model_config_dict=actor_config['model_config'],
+        )
+    planner_agent = ChatAgent(
+        system_message=planner_config['system_prompt'],
+        model=planner_model,
+        message_window_size=10,
+    )
+    print(f'Generating outline...')
+    planner_prompt = outline_template.render(**planner_jinja_args)
+    planner_agent.reset()
+    response = planner_agent.step(planner_prompt)
+    input_token, output_token = account_token(response)
+    total_input_token += input_token
+    total_output_token += output_token
+    figure_arrangement = get_json_from_response(response.msgs[0].content)
+    print(f'Figure arrangement: {json.dumps(figure_arrangement, indent=4)}')
+    arranged_images = {}
+    arranged_tables = {}
+    assigned_images = set()
+    assigned_tables = set()
+    for section_name, figure in figure_arrangement.items():
+        if 'image' in figure:
+            image_id = str(figure['image'])
+            if image_id in assigned_images:
+                continue
+            if image_id in filtered_image_information:
+                arranged_images[image_id] = filtered_image_information[image_id]
+                assigned_images.add(image_id)
+        if 'table' in figure:
+            table_id = str(figure['table'])
+            if table_id in assigned_tables:
+                continue
+            if table_id in filtered_table_information:
+                arranged_tables[table_id] = filtered_table_information[table_id]
+                assigned_tables.add(table_id)
+    compute_gp(arranged_tables, arranged_images)
+    # Obtain panel input
+    paper_panels = []
+    for i in range(len(doc_json['sections'])):
+        section = doc_json['sections'][i]
+        panel = {}
+        panel['panel_id'] = i
+        panel['section_name'] = section['title']
+        panel['tp'] = section['tp']
+        panel['text_len'] = section['text_len']
+        panel['gp'] = 0
+        panel['figure_size'] = 0
+        panel['figure_aspect'] = 1
+        if section['title'] in figure_arrangement:
+            curr_arrangement = figure_arrangement[section['title']]
+            if 'table' in curr_arrangement:
+                table_id = str(curr_arrangement['table'])
+                if table_id in arranged_tables:
+                    panel['gp'] = arranged_tables[table_id]['gp']
+                    panel['figure_size'] = arranged_tables[table_id]['figure_size']
+                    panel['figure_aspect'] = arranged_tables[table_id]['figure_aspect']
+            elif 'image' in curr_arrangement:
+                image_id = str(curr_arrangement['image'])
+                if image_id in arranged_images:
+                    panel['gp'] = arranged_images[image_id]['gp']
+                    panel['figure_size'] = arranged_images[image_id]['figure_size']
+                    panel['figure_aspect'] = arranged_images[image_id]['figure_aspect']
+        paper_panels.append(panel)
+    return total_input_token, total_output_token, paper_panels, figure_arrangement
+def gen_outline_layout(args, actor_config, critic_config):
+    poster_log_path = f'log/{args.model_name}_{args.poster_name}_poster_{args.index}'
+    if not os.path.exists(poster_log_path):
+        os.mkdir(poster_log_path)
+    total_input_token, total_output_token = 0, 0
+    consumption_log = {
+        'outline': [],
+        'h1_actor': [],
+        'h2_actor': [],
+        'h1_critic': [],
+        'gen_layout': []
+    }
+    jinja_env = Environment(undefined=StrictUndefined)
+    outline_file_path = f'outlines/{args.model_name}_{args.poster_name}_outline_{args.index}.json'
+    agent_name = 'poster_planner_new'
+    agent_init_name = 'layout_agent_init'
+    agent_new_section_name = 'layout_agent_new_section'
+    h1_critic_name = 'critic_layout_hierarchy_1'
+    h2_actor_name = 'actor_layout_hierarchy_2'
+    doc_json = json.load(open(f'contents/{args.model_name}_{args.poster_name}_raw_content.json', 'r'))
+    filtered_table_information = json.load(open(f'images_and_tables/{args.poster_name}_tables_filtered.json', 'r'))
+    filtered_image_information = json.load(open(f'images_and_tables/{args.poster_name}_images_filtered.json', 'r'))
+    with open(f"utils/prompt_templates/{agent_name}.yaml", "r", encoding="utf-8") as f:
+        planner_config = yaml.safe_load(f)
+    with open(f"utils/prompt_templates/{agent_init_name}.yaml", "r", encoding="utf-8") as f:
+        config_init = yaml.safe_load(f)
+    with open(f"utils/prompt_templates/{agent_new_section_name}.yaml", "r", encoding="utf-8") as f:
+        config_new_section = yaml.safe_load(f)
+    with open(f"utils/prompt_templates/{h1_critic_name}.yaml", "r", encoding="utf-8") as f:
+        config_h1_critic = yaml.safe_load(f)
+    with open(f"utils/prompt_templates/{h2_actor_name}.yaml", "r", encoding="utf-8") as f:
+        config_h2_actor = yaml.safe_load(f)
+    planner_model = ModelFactory.create(
+        model_platform=actor_config['model_platform'],
+        model_type=actor_config['model_type'],
+        model_config_dict=actor_config['model_config'],
+    )
+    planner_agent = ChatAgent(
+        system_message=planner_config['system_prompt'],
+        model=planner_model,
+        message_window_size=10,
+    )
+    outline_template = jinja_env.from_string(planner_config["template"])
+    planner_jinja_args = {
+        'json_content': doc_json,
+        'table_information': filtered_table_information,
+        'image_information': filtered_image_information,
+    }
+    actor_model = ModelFactory.create(
+        model_platform=actor_config['model_platform'],
+        model_type=actor_config['model_type'],
+        model_config_dict=actor_config['model_config'],
+    )
+    init_actor_sys_msg = config_init['system_prompt']
+    init_actor_agent = ChatAgent(
+        system_message=init_actor_sys_msg,
+        model=actor_model,
+        message_window_size=10,
+    )
+    new_section_actor_sys_msg = config_new_section['system_prompt']
+    new_section_actor_agent = ChatAgent(
+        system_message=new_section_actor_sys_msg,
+        model=actor_model,
+        message_window_size=10,
+    )
+    h1_critic_model = ModelFactory.create(
+        model_platform=critic_config['model_platform'],
+        model_type=critic_config['model_type'],
+        model_config_dict=critic_config['model_config'],
+    )
+    h1_critic_sys_msg = config_h1_critic['system_prompt']
+    h1_critic_agent = ChatAgent(
+        system_message=h1_critic_sys_msg,
+        model=h1_critic_model,
+        message_window_size=None,
+    )
+    h1_pos_example = Image.open('assets/h1_example/h1_pos.jpg')
+    h1_neg_example = Image.open('assets/h1_example/h1_neg.jpg')
+    h2_actor_model = ModelFactory.create(
+        model_platform=actor_config['model_platform'],
+        model_type=actor_config['model_type'],
+        model_config_dict=actor_config['model_config'],
+    )
+    h2_actor_sys_msg = config_h2_actor['system_prompt']
+    h2_actor_agent = ChatAgent(
+        system_message=h2_actor_sys_msg,
+        model=h2_actor_model,
+        message_window_size=10,
+    )
+    attempt = 0
+    while True:
+        print(f'Generating outline attempt {attempt}...')
+        planner_prompt = outline_template.render(**planner_jinja_args)
+        planner_agent.reset()
+        response = planner_agent.step(planner_prompt)
+        input_token, output_token = account_token(response)
+        consumption_log['outline'].append((input_token, output_token))
+        total_input_token += input_token
+        total_output_token += output_token
+        outline = get_json_from_response(response.msgs[0].content)
+        name_to_hierarchy = get_hierarchy(outline)
+        sections = list(outline.keys())
+        sections = [x for x in sections if x != 'meta']
+        init_template = jinja_env.from_string(config_init["template"])
+        new_section_template = jinja_env.from_string(config_new_section["template"])
+        h1_critic_template = jinja_env.from_string(config_h1_critic["template"])
+        init_outline = {'meta': outline['meta'], sections[0]: outline[sections[0]]}
+        new_outline = outline
+        init_jinja_args = {
+            'json_outline': init_outline,
+            'function_docs': documentation
+        }
+        init_prompt = init_template.render(**init_jinja_args)
+        # hierarchy 1 only
+        outline_location = get_outline_location(outline, subsection=False)
+        logs = {}
+        curr_section = sections[0]
+        layout_cumulative_input_token = 0
+        layout_cumulative_output_token = 0
+        print('Generating h1 layout...\n')
+        print(f'Generating h1 layout for section {curr_section}...')
+        logs[curr_section] = gen_layout(
+            init_actor_agent,
+            init_prompt,
+            args.max_retry,
+            name_to_hierarchy,
+            visual_identifier=curr_section
+        )
+        if logs[curr_section][-1]['error'] is not None:
+            raise ValueError(f'Failed to generate layout for section {curr_section}.')
+        layout_cumulative_input_token += logs[curr_section][-1]['cumulative_tokens'][0]
+        layout_cumulative_output_token += logs[curr_section][-1]['cumulative_tokens'][1]
+        for section_index in range(1, len(sections)):
+            curr_section = sections[section_index]
+            print(f'generating h1 layout for section {curr_section}...')
+            new_section_outline = {curr_section: new_outline[curr_section]}
+            new_section_jinja_args = {
+                'json_outline': new_section_outline,
+                'function_docs': documentation
+            }
+            new_section_prompt = new_section_template.render(**new_section_jinja_args)
+            logs[curr_section] = gen_layout(
+                new_section_actor_agent,
+                new_section_prompt,
+                args.max_retry,
+                name_to_hierarchy,
+                visual_identifier=curr_section,
+                existing_code = logs[sections[section_index - 1]][-1]['concatenated_code']
+            )
+            if logs[curr_section][-1]['error'] is not None:
+                raise ValueError(f'Failed to generate layout for section {curr_section}.')
+            layout_cumulative_input_token += logs[curr_section][-1]['cumulative_tokens'][0]
+            layout_cumulative_output_token += logs[curr_section][-1]['cumulative_tokens'][1]
+        consumption_log['h1_actor'].append((layout_cumulative_input_token, layout_cumulative_output_token))
+        total_input_token += layout_cumulative_input_token
+        total_output_token += layout_cumulative_output_token
+        h1_path = f'tmp/poster_<{sections[-1]}>_hierarchy_1.pptx'
+        h2_path = f'tmp/poster_<{sections[-1]}>_hierarchy_2.pptx'
+        h1_filled_path = f'tmp/poster_<{sections[-1]}>_hierarchy_1_filled.pptx'
+        h2_filled_path = f'tmp/poster_<{sections[-1]}>_hierarchy_2_filled.pptx'
+        ppt_to_images(h1_path, 'tmp/layout_h1')
+        ppt_to_images(h2_path, 'tmp/layout_h2')
+        ppt_to_images(h1_filled_path, 'tmp/layout_h1_filled')
+        ppt_to_images(h2_filled_path, 'tmp/layout_h2_filled')
+        h1_img = Image.open('tmp/layout_h1/slide_0001.jpg')
+        h2_img = Image.open('tmp/layout_h2/slide_0001.jpg')
+        h1_filled_img = Image.open('tmp/layout_h1_filled/slide_0001.jpg')
+        h2_filled_img = Image.open('tmp/layout_h2_filled/slide_0001.jpg')
+        h1_critic_msg = BaseMessage.make_user_message(
+            role_name='User',
+            content=h1_critic_template.render(),
+            image_list=[h1_neg_example, h1_pos_example, h1_filled_img]
+        )
+        outline_bbox_dict = {}
+        for k, v in outline_location.items():
+            outline_bbox_dict[k] = v['location']
+        bbox_check_result = check_bounding_boxes(
+            outline_bbox_dict,
+            new_outline['meta']['width'],
+            new_outline['meta']['height']
+        )
+        if len(bbox_check_result) != 0:
+            print(bbox_check_result)
+            attempt += 1
+            continue
+        h1_critic_agent.reset()
+        response = h1_critic_agent.step(h1_critic_msg)
+        input_token, output_token = account_token(response)
+        consumption_log['h1_critic'].append((input_token, output_token))
+        total_input_token += input_token
+        total_output_token += output_token
+        if response.msgs[0].content == 'T':
+            print('Blank area detected.')
+            attempt += 1
+            continue
+        break
+    outline_bbox_dict = {}
+    for k, v in outline_location.items():
+        outline_bbox_dict[k] = v['location']
+    # Generate subsection locations
+    outline_no_sub_locations = copy.deepcopy(new_outline)
+    if 'meta' in outline_no_sub_locations:
+        outline_no_sub_locations.pop('meta')
+    for k, v in outline_no_sub_locations.items():
+        if 'subsections' in v:
+            subsections = v['subsections']
+            for k_sub, v_sub in subsections.items():
+                del v_sub['location']
+                del v_sub['name']
+    h2_actor_template = jinja_env.from_string(config_h2_actor["template"])
+    h2_cumulative_input_token = 0
+    h2_cumulative_output_token = 0
+    for section in sections:
+        while True:
+            print(f'generating h2 for section {section}...')
+            section_outline = {section: outline_no_sub_locations[section]}
+            section_jinja_args = {
+                'section_outline': json.dumps(section_outline, indent=4),
+            }
+            section_prompt = h2_actor_template.render(**section_jinja_args)
+            h2_actor_agent.reset()
+            response = h2_actor_agent.step(section_prompt)
+            input_token, output_token = account_token(response)
+            h2_cumulative_input_token += input_token
+            h2_cumulative_output_token += output_token
+            subsection_location = get_json_from_response(response.msgs[0].content)
+            sec_bbox = outline_no_sub_locations[section]['location']
+            subsection_location_dict = {}
+            for k, v in subsection_location.items():
+                subsection_location_dict[k] = {
+                    'left': v['location'][0],
+                    'top': v['location'][1],
+                    'width': v['location'][2],
+                    'height': v['location'][3]
+                }
+            is_valid, revised = validate_and_adjust_subsections(sec_bbox, subsection_location_dict)
+            if not is_valid:
+                is_valid, revised = validate_and_adjust_subsections(sec_bbox, revised)
+                assert is_valid, "Failed to adjust subsections to fit section"
+                outline_no_sub_locations = fill_location(outline_no_sub_locations, section, revised)
+            else:
+                outline_no_sub_locations = fill_location(outline_no_sub_locations, section, subsection_location)
+            break
+    consumption_log['h2_actor'].append((h2_cumulative_input_token, h2_cumulative_output_token))
+    total_input_token += h2_cumulative_input_token
+    total_output_token += h2_cumulative_output_token
+    outline_no_sub_locations['meta'] = outline['meta']
+    outline_no_sub_locations_with_name = recover_name_and_location(outline_no_sub_locations, new_outline)
+    new_outline = outline_no_sub_locations_with_name
+    ### Outline finalized, actually generate layout
+    logs = {}
+    gen_layout_cumulative_input_token = 0
+    gen_layout_cumulative_output_token = 0
+    curr_section = sections[0]
+    init_outline = {'meta': new_outline['meta'], sections[0]: new_outline[sections[0]]}
+    init_jinja_args = {
+        'json_outline': init_outline,
+        'function_docs': documentation
+    }
+    init_prompt = init_template.render(**init_jinja_args)
+    logs[curr_section] = gen_layout(
+        init_actor_agent,
+        init_prompt,
+        args.max_retry,
+        name_to_hierarchy,
+        visual_identifier=curr_section
+    )
+    if logs[curr_section][-1]['error'] is not None:
+        raise ValueError(f'Failed to generate layout for section {curr_section}.')
+    gen_layout_cumulative_input_token += logs[curr_section][-1]['cumulative_tokens'][0]
+    gen_layout_cumulative_output_token += logs[curr_section][-1]['cumulative_tokens'][1]
+    for section_index in range(1, len(sections)):
+        curr_section = sections[section_index]
+        print(f'generating section {curr_section}...')
+        new_section_outline = {curr_section: new_outline[curr_section]}
+        new_section_jinja_args = {
+            'json_outline': new_section_outline,
+            'function_docs': documentation
+        }
+        new_section_prompt = new_section_template.render(**new_section_jinja_args)
+        logs[curr_section] = gen_layout(
+            new_section_actor_agent,
+            new_section_prompt,
+            args.max_retry,
+            name_to_hierarchy,
+            visual_identifier=curr_section,
+            existing_code = logs[sections[section_index - 1]][-1]['concatenated_code']
+        )
+        if logs[curr_section][-1]['error'] is not None:
+            raise ValueError(f'Failed to generate layout for section {curr_section}.')
+        gen_layout_cumulative_input_token += logs[curr_section][-1]['cumulative_tokens'][0]
+        gen_layout_cumulative_output_token += logs[curr_section][-1]['cumulative_tokens'][1]
+    consumption_log['gen_layout'].append((gen_layout_cumulative_input_token, gen_layout_cumulative_output_token))
+    total_input_token += gen_layout_cumulative_input_token
+    total_output_token += gen_layout_cumulative_output_token
+    h1_path = f'tmp/poster_<{sections[-1]}>_hierarchy_1.pptx'
+    h2_path = f'tmp/poster_<{sections[-1]}>_hierarchy_2.pptx'
+    h1_filled_path = f'tmp/poster_<{sections[-1]}>_hierarchy_1_filled.pptx'
+    h2_filled_path = f'tmp/poster_<{sections[-1]}>_hierarchy_2_filled.pptx'
+    ppt_to_images(h1_path, f'{poster_log_path}/layout_h1')
+    ppt_to_images(h2_path, f'{poster_log_path}/layout_h2')
+    ppt_to_images(h1_filled_path, f'{poster_log_path}/layout_h1_filled')
+    ppt_to_images(h2_filled_path, f'{poster_log_path}/layout_h2_filled')
+    h1_img = Image.open(f'{poster_log_path}/layout_h1/slide_0001.jpg')
+    h2_img = Image.open(f'{poster_log_path}/layout_h2/slide_0001.jpg')
+    h1_filled_img = Image.open(f'{poster_log_path}/layout_h1_filled/slide_0001.jpg')
+    h2_filled_img = Image.open(f'{poster_log_path}/layout_h2_filled/slide_0001.jpg')
+    ckpt = {
+        'logs': logs,
+        'outline': new_outline,
+        'name_to_hierarchy': name_to_hierarchy,
+        'consumption_log': consumption_log,
+        'total_input_token': total_input_token,
+        'total_output_token': total_output_token,
+    }
+    with open(f'checkpoints/{args.model_name}_{args.poster_name}_ckpt_{args.index}.pkl', 'wb') as f:
+        pkl.dump(ckpt, f)
+    json.dump(
+        new_outline,
+        open(outline_file_path, "w"),
+        ensure_ascii=False,
+        indent=4,
+    )
+    return total_input_token, total_output_token
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--poster_name', type=str, default=None)
+    parser.add_argument('--model_name', type=str, default='4o')
+    parser.add_argument('--poster_path', type=str, required=True)
+    parser.add_argument('--index', type=int, default=0)
+    parser.add_argument('--max_retry', type=int, default=3)
+    args = parser.parse_args()
+    actor_config = get_agent_config(args.model_name)
+    critic_config = get_agent_config(args.model_name)
+    if args.poster_name is None:
+        args.poster_name = args.poster_path.split('/')[-1].replace('.pdf', '').replace(' ', '_')
+    input_token, output_token = filter_image_table(args, actor_config)
+    print(f'Token consumption: {input_token} -> {output_token}')
+    input_token, output_token = gen_outline_layout(args, actor_config, critic_config)
+    print(f'Token consumption: {input_token} -> {output_token}')

Paper2Poster/PosterAgent/gen_outline_layout_parallel.py ADDED Viewed

	@@ -0,0 +1,949 @@

+from dotenv import load_dotenv
+import os
+import json
+import copy
+import yaml
+import logging
+import time
+from jinja2 import Environment, StrictUndefined
+from utils.src.utils import ppt_to_images, get_json_from_response
+from camel.models import ModelFactory
+from camel.agents import ChatAgent
+from camel.messages import BaseMessage
+from utils.pptx_utils import *
+from utils.wei_utils import *
+import pickle as pkl
+import argparse
+from concurrent.futures import ThreadPoolExecutor
+from concurrent.futures import ProcessPoolExecutor, as_completed
+import concurrent.futures
+import sys
+load_dotenv()
+logging.basicConfig(
+    level=logging.DEBUG,
+    format='%(threadName)s: %(message)s',
+    stream=sys.stdout
+)
+logger = logging.getLogger(__name__)
+IMAGE_SCALE_RATIO_MIN = 50
+IMAGE_SCALE_RATIO_MAX = 40
+TABLE_SCALE_RATIO_MIN = 100
+TABLE_SCALE_RATIO_MAX = 80
+def layout_process_section_wrapped(
+    sections,
+    new_outline,
+    init_template,
+    new_section_template,
+    init_actor_sys_msg,
+    new_section_actor_sys_msg,
+    actor_config,
+    documentation,
+    max_retry,
+    slide_width,
+    slide_height
+):
+    logs = {}
+    parallel_results = {}
+    total_input_token, total_output_token = 0, 0
+    # Switch from ThreadPoolExecutor to ProcessPoolExecutor
+    with ThreadPoolExecutor() as executor:
+        futures = []
+        for section_index in range(len(sections)):
+            if section_index == 0:
+                sys_msg = init_actor_sys_msg
+                prompt_template = init_template
+            else:
+                sys_msg = new_section_actor_sys_msg
+                prompt_template = new_section_template
+            actor_model = ModelFactory.create(
+                model_platform=actor_config['model_platform'],
+                model_type=actor_config['model_type'],
+                model_config_dict=actor_config['model_config'],
+            )
+            future = executor.submit(
+                layout_process_section,
+                section_index,
+                sections,
+                new_outline,
+                prompt_template,
+                documentation,
+                sys_msg,
+                actor_model,
+                10,
+                max_retry,
+                slide_width,
+                slide_height
+            )
+            futures.append(future)
+        # Collect results as processes complete
+        for future in as_completed(futures):
+            section_index, section_logs, in_toks, out_toks = future.result()
+            # Store logs by section index
+            parallel_results[section_index] = section_logs
+            # Update token counters
+            total_input_token += in_toks
+            total_output_token += out_toks
+    # Merge results back into `logs`
+    for section_index, section_logs in parallel_results.items():
+        curr_section = sections[section_index]
+        logs[curr_section] = section_logs
+    return logs, total_input_token, total_output_token
+def create_agent_fn(sys_msg, agent_model, window_size=10):
+    agent = ChatAgent(
+        system_message=sys_msg,
+        model=agent_model,
+        message_window_size=window_size,
+    )
+    return agent
+def layout_h2_process_section(
+    section,
+    outline_no_sub_locations,
+    h2_actor_template,
+    create_h2_actor_agent,  # If you need a fresh agent for each thread
+):
+    """
+    Run the logic for a single section.
+    Returns a tuple containing:
+      - section name (or id),
+      - updated subsection-location dict,
+      - input token count,
+      - output token count
+    """
+    print(f'Generating h2 for section {section}...', flush=True)
+    # 1) Create the prompt
+    section_outline = {section: outline_no_sub_locations[section]}
+    section_jinja_args = {
+        'section_outline': json.dumps(section_outline, indent=4),
+    }
+    section_prompt = h2_actor_template.render(**section_jinja_args)
+    # 2) Prepare a fresh agent or reuse existing (thread-safe?) agent
+    #    If your h2_actor_agent is not thread-safe, instantiate a new one here:
+    h2_actor_agent = create_h2_actor_agent()
+    h2_actor_agent.reset()
+    # 3) Get response
+    response = h2_actor_agent.step(section_prompt)
+    input_token, output_token = account_token(response)
+    # 4) Parse JSON
+    subsection_location = get_json_from_response(response.msgs[0].content)
+    # 5) Create a dict from the sub-locations
+    sec_bbox = outline_no_sub_locations[section]['location']
+    subsection_location_dict = {}
+    for k, v in subsection_location.items():
+        subsection_location_dict[k] = {
+            'left': v['location'][0],
+            'top': v['location'][1],
+            'width': v['location'][2],
+            'height': v['location'][3]
+        }
+    # 6) Validate and possibly revise
+    is_valid, revised = validate_and_adjust_subsections(sec_bbox, subsection_location_dict)
+    if not is_valid:
+        # Try once more
+        is_valid, revised = validate_and_adjust_subsections(sec_bbox, revised)
+        assert is_valid, "Failed to adjust subsections to fit section"
+        final_sub_loc = revised
+    else:
+        final_sub_loc = subsection_location
+    # Return all data needed by the main thread
+    return section, final_sub_loc, input_token, output_token
+def layout_process_section(
+    section_index,
+    sections,
+    new_outline,
+    new_section_template,
+    documentation,
+    sys_msg,
+    agent_model,
+    window_size,
+    max_retry,
+    slide_width,
+    slide_height
+):
+    """
+    Runs the 'gen_layout' logic for a single section_index.
+    Returns a tuple:
+        (section_index, updated_log, input_tokens, output_tokens)
+    """
+    curr_section = sections[section_index]
+    print(f'Generating h1 layout for section {curr_section}...')
+    # Build outline JSON just for current section
+    new_section_outline = {curr_section: new_outline[curr_section]}
+    if section_index == 0:
+        new_section_outline = {'meta': new_outline['meta'], curr_section: new_outline[curr_section]}
+    new_section_jinja_args = {
+        'json_outline': new_section_outline,
+        'function_docs': documentation,
+        'file_name': f'poster_{section_index}.pptx'
+    }
+    # Render prompt
+    new_section_prompt = new_section_template.render(**new_section_jinja_args)
+    existing_code = ''  # Or fetch from a stable location that is not dependent on real-time results
+    # Call gen_layout
+    section_logs = gen_layout_parallel(
+        create_agent_fn(
+            sys_msg,
+            agent_model,
+            window_size
+        ),
+        new_section_prompt,
+        max_retry,
+        existing_code=existing_code,
+        slide_width=slide_width,
+        slide_height=slide_height,
+        tmp_name=section_index
+    )
+    if section_logs[-1]['error'] is not None:
+        print(f'Failed to generate layout for section {curr_section}.')
+        return None
+    in_toks, out_toks = section_logs[-1]['cumulative_tokens']
+    return (section_index, section_logs, in_toks, out_toks)
+def get_outline_location(outline, subsection=False):
+    outline_location = {}
+    for k, v in outline.items():
+        if k == 'meta':
+            continue
+        outline_location[k] = {
+            'location': v['location'],
+        }
+        if subsection:
+            if 'subsections' in v:
+                outline_location[k]['subsections'] = get_outline_location(v['subsections'])
+    return outline_location
+def apply_outline_location(outline, location, subsection=False):
+    new_outline = {}
+    for k, v in outline.items():
+        if k == 'meta':
+            new_outline[k] = v
+            continue
+        new_outline[k] = copy.deepcopy(v)
+        new_outline[k]['location'] = location[k]['location']
+        if subsection:
+            if 'subsections' in v:
+                new_outline[k]['subsections'] = apply_outline_location(v['subsections'], location[k]['subsections'])
+    return new_outline
+def fill_location(outline, section_name, location_dict):
+    new_outline = copy.deepcopy(outline)
+    if 'subsections' not in new_outline[section_name]:
+        return new_outline
+    for k, v in new_outline[section_name]['subsections'].items():
+        v['location'] = location_dict[k]['location']
+    return new_outline
+def recover_name_and_location(outline_no_name, outline):
+    new_outline = copy.deepcopy(outline_no_name)
+    for k, v in outline_no_name.items():
+        if k == 'meta':
+            continue
+        new_outline[k]['name'] = outline[k]['name']
+        if type(new_outline[k]['location']) == list:
+            new_outline[k]['location'] = {
+                'left': v['location'][0],
+                'top': v['location'][1],
+                'width': v['location'][2],
+                'height': v['location'][3]
+            }
+        if 'subsections' in v:
+            for k_sub, v_sub in v['subsections'].items():
+                new_outline[k]['subsections'][k_sub]['name'] = outline[k]['subsections'][k_sub]['name']
+                if type(new_outline[k]['subsections'][k_sub]['location']) == list:
+                    new_outline[k]['subsections'][k_sub]['location'] = {
+                        'left': v_sub['location'][0],
+                        'top': v_sub['location'][1],
+                        'width': v_sub['location'][2],
+                        'height': v_sub['location'][3]
+                    }
+    return new_outline
+def validate_and_adjust_subsections(section_bbox, subsection_bboxes):
+    """
+    Validate that the given subsections collectively occupy the entire section.
+    If not, return an adjusted version that fixes the layout.
+    We assume all subsections are intended to be stacked vertically with no gaps,
+    spanning the full width of the section.
+    :param section_bbox: dict with keys ["left", "top", "width", "height"]
+    :param subsection_bboxes: dict of subsection_name -> bounding_box (each also
+                              with keys ["left", "top", "width", "height"])
+    :return: (is_valid, revised_subsections)
+             where is_valid is True/False,
+             and revised_subsections is either the same as subsection_bboxes if valid,
+             or a new dict of adjusted bounding boxes if invalid.
+    """
+    # Helper functions
+    def _right(bbox):
+        return bbox["left"] + bbox["width"]
+    def _bottom(bbox):
+        return bbox["top"] + bbox["height"]
+    section_left = section_bbox["left"]
+    section_top = section_bbox["top"]
+    section_right = section_left + section_bbox["width"]
+    section_bottom = section_top + section_bbox["height"]
+    # Convert dictionary to a list of (subsection_name, bbox) pairs
+    items = list(subsection_bboxes.items())
+    if not items:
+        # No subsections is definitely not valid if we want to fill the section
+        return False, None
+    # Sort subsections by their 'top' coordinate
+    items_sorted = sorted(items, key=lambda x: x[1]["top"])
+    # ---------------------------
+    # Step 1: Validate
+    # ---------------------------
+    # We'll check:
+    # 1. left/right boundaries match the section for each subsection
+    # 2. The first subsection's top == section_top
+    # 3. The last subsection's bottom == section_bottom
+    # 4. Each pair of consecutive subsections lines up exactly
+    #    (previous bottom == current top) with no gap or overlap.
+    is_valid = True
+    # Check left/right for each
+    for name, bbox in items_sorted:
+        if bbox["left"] != section_left or _right(bbox) != section_right:
+            is_valid = False
+            break
+    # Check alignment for the first and last
+    if is_valid:
+        first_sub_name, first_sub_bbox = items_sorted[0]
+        if first_sub_bbox["top"] != section_top:
+            is_valid = False
+    if is_valid:
+        last_sub_name, last_sub_bbox = items_sorted[-1]
+        if _bottom(last_sub_bbox) != section_bottom:
+            is_valid = False
+    # Check consecutive alignment
+    if is_valid:
+        for i in range(len(items_sorted) - 1):
+            _, current_bbox  = items_sorted[i]
+            _, next_bbox     = items_sorted[i + 1]
+            if _bottom(current_bbox) != next_bbox["top"]:
+                is_valid = False
+                break
+    # If everything passed, we return
+    if is_valid:
+        return True, subsection_bboxes
+    # ---------------------------
+    # Step 2: Revise
+    # ---------------------------
+    # We will adjust all subsection bboxes so that they occupy
+    # the entire section exactly, preserving each original bbox's
+    # height *ratio* if possible.
+    # 2a. Compute total original height (in the order of sorted items)
+    original_heights = [bbox["height"] for _, bbox in items_sorted]
+    total_original_height = sum(original_heights)
+    # Avoid divide-by-zero if somehow there's a 0 height
+    if total_original_height <= 0:
+        # Fallback: split the section equally among subsections
+        # to avoid zero or negative heights
+        chunk_height = section_bbox["height"] / len(items_sorted)
+        scale_heights = [chunk_height] * len(items_sorted)
+    else:
+        # Scale each original height by the ratio of
+        # (section total height / sum of original heights)
+        scale = section_bbox["height"] / total_original_height
+        scale_heights = [h * scale for h in original_heights]
+    # 2b. Assign bounding boxes top->bottom, ensuring no gap
+    revised = {}
+    current_top = section_top
+    for i, (name, original_bbox) in enumerate(items_sorted):
+        revised_height = scale_heights[i]
+        # If there's floating error, we can clamp in the last iteration
+        # so that the bottom exactly matches section_bottom.
+        # But for simplicity, we'll keep it straightforward unless needed.
+        revised[name] = {
+            "left": section_left,
+            "top": current_top,
+            "width": section_bbox["width"],
+            "height": revised_height
+        }
+        # Update current_top for next subsection
+        current_top += revised_height
+    # Due to potential float rounding, we can enforce the last subsection
+    # to exactly end at section_bottom:
+    last_name = items_sorted[-1][0]
+    # Recompute the actual bottom after the above assignment
+    new_bottom = revised[last_name]["top"] + revised[last_name]["height"]
+    diff = new_bottom - section_bottom
+    if abs(diff) > 1e-9:
+        # Adjust the last subsection's height
+        revised[last_name]["height"] -= diff
+    # Return the revised dictionary
+    return False, revised
+def filter_image_table(args, filter_config):
+    images = json.load(open(f'images_and_tables/{args.poster_name}_images.json', 'r'))
+    tables = json.load(open(f'images_and_tables/{args.poster_name}_tables.json', 'r'))
+    doc_json = json.load(open(f'contents/{args.model_name}_{args.poster_name}_raw_content.json', 'r'))
+    agent_filter = 'image_table_filter_agent'
+    with open(f"prompt_templates/{agent_filter}.yaml", "r") as f:
+        config_filter = yaml.safe_load(f)
+    image_information = {}
+    for k, v in images.items():
+        image_information[k] = copy.deepcopy(v)
+        image_information[k]['min_width'] = v['width'] // IMAGE_SCALE_RATIO_MIN
+        image_information[k]['min_height'] = v['height'] // IMAGE_SCALE_RATIO_MIN
+        image_information[k]['max_width'] = v['width'] // IMAGE_SCALE_RATIO_MAX
+        image_information[k]['max_height'] = v['height'] // IMAGE_SCALE_RATIO_MAX
+    table_information = {}
+    for k, v in tables.items():
+        table_information[k] = copy.deepcopy(v)
+        table_information[k]['min_width'] = v['width'] // TABLE_SCALE_RATIO_MIN
+        table_information[k]['min_height'] = v['height'] // TABLE_SCALE_RATIO_MIN
+        table_information[k]['max_width'] = v['width'] // TABLE_SCALE_RATIO_MAX
+        table_information[k]['max_height'] = v['height'] // TABLE_SCALE_RATIO_MAX
+    filter_actor_sys_msg = config_filter['system_prompt']
+    filter_model = ModelFactory.create(
+        model_platform=filter_config['model_platform'],
+        model_type=filter_config['model_type'],
+        model_config_dict=filter_config['model_config'],
+    )
+    filter_actor_agent = ChatAgent(
+        system_message=filter_actor_sys_msg,
+        model=filter_model,
+        message_window_size=10, # [Optional] the length for chat memory
+    )
+    filter_jinja_args = {
+        'json_content': doc_json,
+        'table_information': table_information,
+        'image_information': image_information,
+    }
+    jinja_env = Environment(undefined=StrictUndefined)
+    filter_prompt = jinja_env.from_string(config_filter["template"])
+    response = filter_actor_agent.step(filter_prompt.render(**filter_jinja_args))
+    input_token, output_token = account_token(response)
+    response_json = get_json_from_response(response.msgs[0].content)
+    table_information = response_json['table_information']
+    image_information = response_json['image_information']
+    json.dump(images, open(f'images_and_tables/{args.poster_name}_images_filtered.json', 'w'), indent=4)
+    json.dump(tables, open(f'images_and_tables/{args.poster_name}_tables_filtered.json', 'w'), indent=4)
+    return input_token, output_token
+def gen_outline_layout(args, actor_config, critic_config):
+    poster_log_path = f'log/{args.model_name}_{args.poster_name}_poster_{args.index}'
+    if not os.path.exists(poster_log_path):
+        os.mkdir(poster_log_path)
+    total_input_token, total_output_token = 0, 0
+    consumption_log = {
+        'outline': [],
+        'h1_actor': [],
+        'h2_actor': [],
+        'h1_critic': [],
+        'gen_layout': []
+    }
+    jinja_env = Environment(undefined=StrictUndefined)
+    outline_file_path = f'outlines/{args.model_name}_{args.poster_name}_outline_{args.index}.json'
+    agent_name = 'poster_planner_new'
+    agent_init_name = 'layout_agent_init_parallel'
+    agent_new_section_name = 'layout_agent_new_section_parallel'
+    h1_critic_name = 'critic_layout_hierarchy_1'
+    h2_actor_name = 'actor_layout_hierarchy_2'
+    doc_json = json.load(open(f'contents/{args.model_name}_{args.poster_name}_raw_content.json', 'r'))
+    filtered_table_information = json.load(open(f'images_and_tables/{args.poster_name}_tables_filtered.json', 'r'))
+    filtered_image_information = json.load(open(f'images_and_tables/{args.poster_name}_images_filtered.json', 'r'))
+    with open(f"prompt_templates/{agent_name}.yaml", "r") as f:
+        planner_config = yaml.safe_load(f)
+    with open(f"prompt_templates/{agent_init_name}.yaml", "r") as f:
+        config_init = yaml.safe_load(f)
+    with open(f"prompt_templates/{agent_new_section_name}.yaml", "r") as f:
+        config_new_section = yaml.safe_load(f)
+    with open(f"prompt_templates/{h1_critic_name}.yaml", "r") as f:
+        config_h1_critic = yaml.safe_load(f)
+    with open(f"prompt_templates/{h2_actor_name}.yaml", "r") as f:
+        config_h2_actor = yaml.safe_load(f)
+    planner_model = ModelFactory.create(
+        model_platform=actor_config['model_platform'],
+        model_type=actor_config['model_type'],
+        model_config_dict=actor_config['model_config'],
+    )
+    planner_agent = ChatAgent(
+        system_message=planner_config['system_prompt'],
+        model=planner_model,
+        message_window_size=10,
+    )
+    outline_template = jinja_env.from_string(planner_config["template"])
+    planner_jinja_args = {
+        'json_content': doc_json,
+        'table_information': filtered_table_information,
+        'image_information': filtered_image_information,
+    }
+    actor_model = ModelFactory.create(
+        model_platform=actor_config['model_platform'],
+        model_type=actor_config['model_type'],
+        model_config_dict=actor_config['model_config'],
+    )
+    init_actor_sys_msg = config_init['system_prompt']
+    def create_init_actor_agent():
+        actor_model = ModelFactory.create(
+            model_platform=actor_config['model_platform'],
+            model_type=actor_config['model_type'],
+            model_config_dict=actor_config['model_config'],
+        )
+        init_actor_agent = ChatAgent(
+            system_message=init_actor_sys_msg,
+            model=actor_model,
+            message_window_size=10,
+        )
+        return init_actor_agent
+    new_section_actor_sys_msg = config_new_section['system_prompt']
+    def create_new_section_actor_agent():
+        actor_model = ModelFactory.create(
+            model_platform=actor_config['model_platform'],
+            model_type=actor_config['model_type'],
+            model_config_dict=actor_config['model_config'],
+        )
+        new_section_actor_agent = ChatAgent(
+            system_message=new_section_actor_sys_msg,
+            model=actor_model,
+            message_window_size=10,
+        )
+        return new_section_actor_agent
+    h1_critic_model = ModelFactory.create(
+        model_platform=critic_config['model_platform'],
+        model_type=critic_config['model_type'],
+        model_config_dict=critic_config['model_config'],
+    )
+    h1_critic_sys_msg = config_h1_critic['system_prompt']
+    h1_critic_agent = ChatAgent(
+        system_message=h1_critic_sys_msg,
+        model=h1_critic_model,
+        message_window_size=None,
+    )
+    h1_pos_example = Image.open('h1_example/h1_pos.jpg')
+    h1_neg_example = Image.open('h1_example/h1_neg.jpg')
+    h2_actor_model = ModelFactory.create(
+        model_platform=actor_config['model_platform'],
+        model_type=actor_config['model_type'],
+        model_config_dict=actor_config['model_config'],
+    )
+    h2_actor_sys_msg = config_h2_actor['system_prompt']
+    def create_h2_actor_agent():
+        h2_actor_model = ModelFactory.create(
+            model_platform=actor_config['model_platform'],
+            model_type=actor_config['model_type'],
+            model_config_dict=actor_config['model_config'],
+        )
+        h2_actor_agent = ChatAgent(
+            system_message=h2_actor_sys_msg,
+            model=h2_actor_model,
+            message_window_size=10,
+        )
+        return h2_actor_agent
+    init_template = jinja_env.from_string(config_init["template"])
+    new_section_template = jinja_env.from_string(config_new_section["template"])
+    h1_critic_template = jinja_env.from_string(config_h1_critic["template"])
+    attempt = 0
+    while True:
+        print(f'Generating outline attempt {attempt}...', flush=True)
+        planner_prompt = outline_template.render(**planner_jinja_args)
+        planner_agent.reset()
+        response = planner_agent.step(planner_prompt)
+        outline = get_json_from_response(response.msgs[0].content)
+        input_token, output_token = account_token(response)
+        sections = list(outline.keys())
+        sections = [x for x in sections if x != 'meta']
+        slide_width = outline['meta']['width']
+        slide_height = outline['meta']['height']
+        name_to_hierarchy = get_hierarchy(outline)
+        consumption_log['outline'].append((input_token, output_token))
+        total_input_token += input_token
+        total_output_token += output_token
+        init_outline = {'meta': outline['meta'], sections[0]: outline[sections[0]]}
+        new_outline = outline
+        init_jinja_args = {
+            'json_outline': init_outline,
+            'function_docs': documentation
+        }
+        init_prompt = init_template.render(**init_jinja_args)
+        # hierarchy 1 only
+        outline_location = get_outline_location(outline, subsection=False)
+        logs, layout_cumulative_input_token, layout_cumulative_output_token = layout_process_section_wrapped(
+            sections,
+            new_outline,
+            init_template,
+            new_section_template,
+            init_actor_sys_msg,
+            new_section_actor_sys_msg,
+            actor_config,
+            documentation,
+            args.max_retry,
+            slide_width,
+            slide_height
+        )
+        concatenated_code = utils_functions
+        for section_index in range(len(sections)):
+            section = sections[section_index]
+            concatenated_code += '\n' + logs[section][-1]['code']
+            presentation_object_name = logs[section][-1]['output'].replace('\n', '')
+            concatenated_code += '\n' + f'save_presentation({presentation_object_name}, file_name="poster_{section_index + 1}.pptx")'
+        concatenated_code += f'''
+name_to_hierarchy = {name_to_hierarchy}
+identifier = "parallel"
+poster_path = "poster_{section_index + 1}.pptx"
+get_visual_cues(name_to_hierarchy, identifier, poster_path)
+'''
+        output, error = run_code_with_utils(concatenated_code, utils_functions)
+        if error is not None:
+            print(error, flush=True)
+            attempt += 1
+            continue
+        consumption_log['h1_actor'].append((layout_cumulative_input_token, layout_cumulative_output_token))
+        total_input_token += layout_cumulative_input_token
+        total_output_token += layout_cumulative_output_token
+        h1_path = f'tmp/poster_<parallel>_hierarchy_1.pptx'
+        h2_path = f'tmp/poster_<parallel>_hierarchy_2.pptx'
+        h1_filled_path = f'tmp/poster_<parallel>_hierarchy_1_filled.pptx'
+        h2_filled_path = f'tmp/poster_<parallel>_hierarchy_2_filled.pptx'
+        ppt_to_images(h1_path, 'tmp/layout_h1')
+        ppt_to_images(h2_path, 'tmp/layout_h2')
+        ppt_to_images(h1_filled_path, 'tmp/layout_h1_filled')
+        ppt_to_images(h2_filled_path, 'tmp/layout_h2_filled')
+        h1_img = Image.open('tmp/layout_h1/slide_0001.jpg')
+        h2_img = Image.open('tmp/layout_h2/slide_0001.jpg')
+        h1_filled_img = Image.open('tmp/layout_h1_filled/slide_0001.jpg')
+        h2_filled_img = Image.open('tmp/layout_h2_filled/slide_0001.jpg')
+        h1_critic_msg = BaseMessage.make_user_message(
+            role_name='User',
+            content=h1_critic_template.render(),
+            image_list=[h1_neg_example, h1_pos_example, h1_filled_img]
+        )
+        outline_bbox_dict = {}
+        for k, v in outline_location.items():
+            outline_bbox_dict[k] = v['location']
+        bbox_check_result = check_bounding_boxes(
+            outline_bbox_dict,
+            new_outline['meta']['width'],
+            new_outline['meta']['height']
+        )
+        if len(bbox_check_result) != 0:
+            print(bbox_check_result, flush=True)
+            attempt += 1
+            continue
+        h1_critic_agent.reset()
+        response = h1_critic_agent.step(h1_critic_msg)
+        input_token, output_token = account_token(response)
+        consumption_log['h1_critic'].append((input_token, output_token))
+        total_input_token += input_token
+        total_output_token += output_token
+        if response.msgs[0].content == 'T':
+            print('Blank area detected.', flush=True)
+            attempt += 1
+            continue
+        print('Sucessfully generated outline.', flush=True)
+        break
+    outline_bbox_dict = {}
+    for k, v in outline_location.items():
+        outline_bbox_dict[k] = v['location']
+    # Generate subsection locations
+    outline_no_sub_locations = copy.deepcopy(new_outline)
+    if 'meta' in outline_no_sub_locations:
+        outline_no_sub_locations.pop('meta')
+    for k, v in outline_no_sub_locations.items():
+        if 'subsections' in v:
+            subsections = v['subsections']
+            for k_sub, v_sub in subsections.items():
+                del v_sub['location']
+                del v_sub['name']
+    h2_actor_template = jinja_env.from_string(config_h2_actor["template"])
+    h2_cumulative_input_token = 0
+    h2_cumulative_output_token = 0
+    updated_sections = []
+    with ThreadPoolExecutor() as executor:
+        # Kick off all tasks
+        future_to_section = {
+            executor.submit(
+                layout_h2_process_section,
+                section,
+                outline_no_sub_locations,
+                h2_actor_template,
+                create_h2_actor_agent  # pass the factory function
+            ): section
+            for section in sections
+        }
+        # Gather results as they complete
+        for future in concurrent.futures.as_completed(future_to_section):
+            section = future_to_section[future]
+            sec, final_sub_loc, in_toks, out_toks = future.result()
+            # Accumulate token usage
+            h2_cumulative_input_token += in_toks
+            h2_cumulative_output_token += out_toks
+            # Stash the final sub-loc for merging
+            updated_sections.append((sec, final_sub_loc))
+    # Now merge each updated subsection location back into outline_no_sub_locations
+    for (section, final_sub_loc) in updated_sections:
+        outline_no_sub_locations = fill_location(
+            outline_no_sub_locations,
+            section,
+            final_sub_loc
+        )
+    consumption_log['h2_actor'].append((h2_cumulative_input_token, h2_cumulative_output_token))
+    total_input_token += h2_cumulative_input_token
+    total_output_token += h2_cumulative_output_token
+    outline_no_sub_locations['meta'] = outline['meta']
+    outline_no_sub_locations_with_name = recover_name_and_location(outline_no_sub_locations, new_outline)
+    new_outline = outline_no_sub_locations_with_name
+    ### Outline finalized, actually generate layout
+    logs = {}
+    gen_layout_cumulative_input_token = 0
+    gen_layout_cumulative_output_token = 0
+    init_outline = {'meta': outline['meta'], sections[0]: outline[sections[0]]}
+    new_outline = outline
+    init_jinja_args = {
+        'json_outline': init_outline,
+        'function_docs': documentation
+    }
+    outline_location = get_outline_location(outline, subsection=False)
+    logs = {}
+    # We'll store all updated logs here, keyed by section_index.
+    parallel_results = {}
+    with concurrent.futures.ThreadPoolExecutor() as executor:
+        futures = []
+        for section_index in range(len(sections)):
+            if section_index == 0:
+                create_agent_fn = create_init_actor_agent
+                prompt_template = init_template
+            else:
+                create_agent_fn = create_new_section_actor_agent
+                prompt_template = new_section_template
+            future = executor.submit(
+                layout_process_section,
+                section_index,
+                sections,
+                new_outline,
+                prompt_template,
+                documentation,
+                create_agent_fn,
+                args.max_retry,
+                name_to_hierarchy,
+                slide_width,
+                slide_height
+            )
+            futures.append(future)
+        # Collect the results as they come in
+        for future in concurrent.futures.as_completed(futures):
+            try:
+                section_index, section_logs, in_toks, out_toks = future.result()
+                # Store these logs in a dictionary keyed by the section index
+                parallel_results[section_index] = section_logs
+                # Update token counters
+                gen_layout_cumulative_input_token += in_toks
+                gen_layout_cumulative_output_token += out_toks
+            except Exception as exc:
+                print(f"[ERROR] A section failed: {exc}", flush=True)
+                # Possibly re-raise if you want to stop everything on error
+                # raise
+    # After all tasks complete, merge the results back into `logs`
+    for section_index, section_logs in parallel_results.items():
+        curr_section = sections[section_index]
+        logs[curr_section] = section_logs
+    concatenated_code = utils_functions
+    for section_index in range(len(sections)):
+        section = sections[section_index]
+        concatenated_code += '\n' + logs[section][-1]['code']
+        concatenated_code += '\n' + f'save_presentation(presentation, file_name="poster_{section_index + 1}.pptx")'
+    concatenated_code += f'''
+name_to_hierarchy = {name_to_hierarchy}
+identifier = "parallel"
+poster_path = "poster_{section_index + 1}.pptx"
+get_visual_cues(name_to_hierarchy, identifier, poster_path)
+'''
+    output, error = run_code(concatenated_code)
+    if error is not None:
+        print(f'Failed to generate layout for section {curr_section}.')
+    consumption_log['h1_actor'].append((layout_cumulative_input_token, layout_cumulative_output_token))
+    total_input_token += gen_layout_cumulative_input_token
+    total_output_token += gen_layout_cumulative_output_token
+    h1_path = f'tmp/poster_<parallel>_hierarchy_1.pptx'
+    h2_path = f'tmp/poster_<parallel>_hierarchy_2.pptx'
+    h1_filled_path = f'tmp/poster_<parallel>_hierarchy_1_filled.pptx'
+    h2_filled_path = f'tmp/poster_<parallel>_hierarchy_2_filled.pptx'
+    ppt_to_images(h1_path, 'tmp/layout_h1')
+    ppt_to_images(h2_path, 'tmp/layout_h2')
+    ppt_to_images(h1_filled_path, 'tmp/layout_h1_filled')
+    ppt_to_images(h2_filled_path, 'tmp/layout_h2_filled')
+    h1_img = Image.open('tmp/layout_h1/slide_0001.jpg')
+    h2_img = Image.open('tmp/layout_h2/slide_0001.jpg')
+    h1_filled_img = Image.open('tmp/layout_h1_filled/slide_0001.jpg')
+    h2_filled_img = Image.open('tmp/layout_h2_filled/slide_0001.jpg')
+    ckpt = {
+        'logs': logs,
+        'outline': new_outline,
+        'name_to_hierarchy': name_to_hierarchy,
+        'consumption_log': consumption_log,
+        'total_input_token': total_input_token,
+        'total_output_token': total_output_token,
+    }
+    with open(f'checkpoints/{args.model_name}_{args.poster_name}_ckpt_{args.index}.pkl', 'wb') as f:
+        pkl.dump(ckpt, f)
+    json.dump(
+        new_outline,
+        open(outline_file_path, "w"),
+        ensure_ascii=False,
+        indent=4,
+    )
+    return total_input_token, total_output_token
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--poster_name', type=str, default=None)
+    parser.add_argument('--model_name', type=str, default='4o')
+    parser.add_argument('--poster_path', type=str, required=True)
+    parser.add_argument('--index', type=int, default=0)
+    parser.add_argument('--max_retry', type=int, default=3)
+    args = parser.parse_args()
+    actor_config = get_agent_config(args.model_name)
+    critic_config = get_agent_config(args.model_name)
+    if args.poster_name is None:
+        args.poster_name = args.poster_path.split('/')[-1].replace('.pdf', '').replace(' ', '_')
+    input_token, output_token = filter_image_table(args, actor_config)
+    print(f'Token consumption: {input_token} -> {output_token}', flush=True)
+    input_token, output_token = gen_outline_layout(args, actor_config, critic_config)
+    print(f'Token consumption: {input_token} -> {output_token}', flush=True)

Paper2Poster/PosterAgent/gen_poster_content.py ADDED Viewed

	@@ -0,0 +1,529 @@

+import tempfile
+import shutil
+from dotenv import load_dotenv
+from utils.src.utils import get_json_from_response
+from concurrent.futures import ThreadPoolExecutor, as_completed
+import json
+from camel.models import ModelFactory
+from PosterAgent.gen_pptx_code import generate_poster_code
+from camel.agents import ChatAgent
+from camel.messages import BaseMessage
+from utils.src.utils import ppt_to_images
+from PIL import Image
+from utils.wei_utils import *
+from utils.pptx_utils import *
+from utils.critic_utils import *
+import yaml
+from jinja2 import Environment, StrictUndefined
+import argparse
+load_dotenv()
+MAX_ATTEMPT = 10
+def gen_content_process_section(
+    section_name,
+    outline,
+    raw_content,
+    raw_outline,
+    template,
+    create_actor_agent,
+    MAX_ATTEMPT
+):
+    """
+    Process a single section in its own thread or process.
+    Returns (section_name, result_json, total_input_token, total_output_token).
+    """
+    # Create a fresh ActorAgent instance for each parallel call
+    actor_agent = create_actor_agent()
+    section_outline = ''
+    num_attempts = 0
+    total_input_token = 0
+    total_output_token = 0
+    result_json = None
+    while True:
+        print(f"[Thread] Generating content for section: {section_name}")
+        if len(section_outline) == 0:
+            # Initialize the section outline
+            section_outline = json.dumps(outline[section_name], indent=4)
+        # Render prompt using Jinja template
+        jinja_args = {
+            'json_outline': section_outline,
+            'json_content': raw_content,
+        }
+        prompt = template.render(**jinja_args)
+        # Step the actor_agent and track tokens
+        response = actor_agent.step(prompt)
+        input_token, output_token = account_token(response)
+        total_input_token += input_token
+        total_output_token += output_token
+        # Parse JSON and possibly adjust text length
+        result_json = get_json_from_response(response.msgs[0].content)
+        new_section_outline, suggested = generate_length_suggestions(
+            result_json,
+            json.dumps(outline[section_name]),
+            raw_outline[section_name]
+        )
+        section_outline = json.dumps(new_section_outline, indent=4)
+        if not suggested:
+            # No more adjustments needed
+            break
+        print(f"[Thread] Adjusting text length for section: {section_name}...")
+        num_attempts += 1
+        if num_attempts >= MAX_ATTEMPT:
+            break
+    return section_name, result_json, total_input_token, total_output_token
+def gen_content_parallel_process_sections(
+    sections,
+    outline,
+    raw_content,
+    raw_outline,
+    template,
+    create_actor_agent,
+    MAX_ATTEMPT=3
+):
+    """
+    Parallelize the section processing using ThreadPoolExecutor.
+    """
+    poster_content = {}
+    total_input_token = 0
+    total_output_token = 0
+    # Create a pool of worker threads (or processes)
+    with ThreadPoolExecutor() as executor:
+        futures = []
+        # Submit each section to be processed in parallel
+        for section_name in sections:
+            futures.append(
+                executor.submit(
+                    gen_content_process_section,
+                    section_name,
+                    outline,
+                    raw_content,
+                    raw_outline,
+                    template,
+                    create_actor_agent,
+                    MAX_ATTEMPT
+                )
+            )
+        # Collect results as they complete
+        for future in as_completed(futures):
+            section_name, result_json, sec_input_token, sec_output_token = future.result()
+            poster_content[section_name] = result_json
+            total_input_token += sec_input_token
+            total_output_token += sec_output_token
+    return poster_content, total_input_token, total_output_token
+def render_textbox(text_arrangement, textbox_content, tmp_dir):
+    arrangement = copy.deepcopy(text_arrangement)
+    arrangement['x'] = 1
+    arrangement['y'] = 1
+    poster_code = generate_poster_code(
+        [],
+        [arrangement],
+        [],
+        presentation_object_name='poster_presentation',
+        slide_object_name='poster_slide',
+        utils_functions=utils_functions,
+        slide_width=text_arrangement['width'] + 3,
+        slide_height=text_arrangement['height'] + 3,
+        img_path='placeholder.jpg',
+        save_path=f'{tmp_dir}/poster.pptx',
+        visible=True,
+        content=textbox_content,
+        check_overflow=True,
+        tmp_dir=tmp_dir,
+    )
+    output, err = run_code(poster_code)
+    ppt_to_images(f'{tmp_dir}/poster.pptx', tmp_dir, output_type='jpg')
+    img = Image.open(f'{tmp_dir}/poster.jpg')
+    return img
+def gen_poster_title_content(args, actor_config):
+    total_input_token, total_output_token = 0, 0
+    raw_content = json.load(open(f'contents/<{args.model_name_t}_{args.model_name_v}>_{args.poster_name}_raw_content.json', 'r'))
+    actor_agent_name = 'poster_title_agent'
+    title_string = raw_content['meta']
+    with open(f'utils/prompt_templates/{actor_agent_name}.yaml', "r") as f:
+        content_config = yaml.safe_load(f)
+    jinja_env = Environment(undefined=StrictUndefined)
+    template = jinja_env.from_string(content_config["template"])
+    if args.model_name_t == 'vllm_qwen':
+        actor_model = ModelFactory.create(
+            model_platform=actor_config['model_platform'],
+            model_type=actor_config['model_type'],
+            model_config_dict=actor_config['model_config'],
+            url=actor_config['url'],
+        )
+    else:
+        actor_model = ModelFactory.create(
+            model_platform=actor_config['model_platform'],
+            model_type=actor_config['model_type'],
+            model_config_dict=actor_config['model_config']
+        )
+    actor_sys_msg = content_config['system_prompt']
+    actor_agent = ChatAgent(
+        system_message=actor_sys_msg,
+        model=actor_model,
+        message_window_size=30
+    )
+    jinja_args = {
+        'title_string': title_string,
+        'title_font_size': getattr(args, 'poster_title_font_size', None) or getattr(args, 'title_font_size', None),
+        'author_font_size': getattr(args, 'poster_author_font_size', None) or getattr(args, 'author_font_size', None),
+    }
+    prompt = template.render(**jinja_args)
+    # Step the actor_agent and track tokens
+    actor_agent.reset()
+    response = actor_agent.step(prompt)
+    input_token, output_token = account_token(response)
+    total_input_token += input_token
+    total_output_token += output_token
+    result_json = get_json_from_response(response.msgs[0].content)
+    return result_json, total_input_token, total_output_token
+def gen_bullet_point_content(args, actor_config, critic_config, agent_modify=True, tmp_dir='tmp'):
+    import json, yaml, copy, threading
+    from concurrent.futures import ThreadPoolExecutor, as_completed
+    from PIL import Image
+    from jinja2 import Environment, StrictUndefined
+    # ----------------------- Load data & configs -----------------------
+    total_input_token_t = total_output_token_t = 0
+    total_input_token_v = total_output_token_v = 0
+    raw_content = json.load(open(f'contents/<{args.model_name_t}_{args.model_name_v}>_{args.poster_name}_raw_content.json', 'r'))
+    with open(f'tree_splits/<{args.model_name_t}_{args.model_name_v}>_{args.poster_name}_tree_split_{args.index}.json', 'r') as f:
+        tree_split_results = json.load(f)
+    panels = tree_split_results['panels']
+    text_arrangement_list = tree_split_results['text_arrangement_inches']
+    actor_agent_name = 'bullet_point_agent'
+    if args.model_name_v == 'vllm_qwen_vl':
+        critic_agent_name = 'critic_overlap_agent_v3_short'
+    else:
+        critic_agent_name = 'critic_overlap_agent_v3'
+    with open(f"utils/prompt_templates/{actor_agent_name}.yaml", "r") as f:
+        content_config = yaml.safe_load(f)
+    with open(f"utils/prompt_templates/{critic_agent_name}.yaml", "r") as f:
+        critic_content_config = yaml.safe_load(f)
+    jinja_env = Environment(undefined=StrictUndefined)
+    template = jinja_env.from_string(content_config["template"])
+    critic_template = jinja_env.from_string(critic_content_config["template"])
+    # Preload images once (each worker can reopen if needed, or just pass paths)
+    neg_img_path = 'assets/overflow_example_v2/neg.jpg'
+    pos_img_path = 'assets/overflow_example_v2/pos.jpg'
+    # Group text arrangements by panel_id for O(1) lookup in workers
+    from collections import defaultdict
+    textboxes_by_panel = defaultdict(list)
+    for ta in text_arrangement_list:
+        textboxes_by_panel[ta['panel_id']].append(ta)
+    # Ensure deterministic order inside each panel
+    for k in textboxes_by_panel:
+        textboxes_by_panel[k] = sorted(textboxes_by_panel[k], key=lambda x: x.get('textbox_id', 0))
+    # ----------------------- Worker (defined INSIDE main fn) -----------------------
+    def _process_section(i):
+        """
+        Returns:
+          (i, result_json, t_in, t_out, v_in, v_out)
+        """
+        local_t_in = local_t_out = 0
+        local_v_in = local_v_out = 0
+        arrangement = panels[i]
+        num_textboxes = 2 if arrangement.get('gp', 0) > 0 else 1
+        local_tmp_dir = tempfile.mkdtemp(prefix=f"sec_{i}_", dir=tmp_dir)
+        jinja_args = {
+            'summary_of_section': raw_content['sections'][i]['content'],
+            'number_of_textboxes': num_textboxes,
+            'section_title': raw_content['sections'][i]['title'],
+            'bullet_font_size': args.bullet_font_size,
+            'section_title_font_size': args.section_title_font_size,
+        }
+        target_textboxes = textboxes_by_panel[i][1:]  # skip first (section title)
+        total_expected_length = sum(tb['num_chars'] for tb in target_textboxes)
+        # Create fresh models & agents per thread for safety
+        if args.model_name_t.startswith('vllm_qwen'):
+            actor_model = ModelFactory.create(
+                model_platform=actor_config['model_platform'],
+                model_type=actor_config['model_type'],
+                model_config_dict=actor_config['model_config'],
+                url=actor_config['url'],
+            )
+        else:
+            actor_model = ModelFactory.create(
+                model_platform=actor_config['model_platform'],
+                model_type=actor_config['model_type'],
+                model_config_dict=actor_config['model_config']
+            )
+        if args.model_name_v.startswith('vllm_qwen'):
+            critic_model = ModelFactory.create(
+                model_platform=critic_config['model_platform'],
+                model_type=critic_config['model_type'],
+                model_config_dict=critic_config['model_config'],
+                url=critic_config['url'],
+            )
+        else:
+            critic_model = ModelFactory.create(
+                model_platform=critic_config['model_platform'],
+                model_type=critic_config['model_type'],
+                model_config_dict=critic_config['model_config']
+            )
+        actor_agent = ChatAgent(system_message=content_config['system_prompt'], model=actor_model, message_window_size=30)
+        critic_agent = ChatAgent(system_message=critic_content_config['system_prompt'], model=critic_model, message_window_size=10)
+        prompt = template.render(**jinja_args)
+        actor_agent.reset()
+        response = actor_agent.step(prompt)
+        t_in, t_out = account_token(response)
+        local_t_in += t_in
+        local_t_out += t_out
+        result_json = get_json_from_response(response.msgs[0].content)
+        max_attempts = 5
+        num_attempts = 0
+        old_result_json = copy.deepcopy(result_json)
+        # Length control loop
+        while args.estimate_chars:
+            num_attempts += 1
+            if num_attempts > max_attempts:
+                result_json = old_result_json
+                break
+            try:
+                total_bullet_length = 0
+                for j in range(num_textboxes):
+                    bullet_content_key = f'textbox{j + 1}'
+                    total_bullet_length += compute_bullet_length(result_json[bullet_content_key])
+            except Exception:
+                result_json = old_result_json
+                break
+            if total_bullet_length > total_expected_length:
+                percentage_to_shrink = int((total_bullet_length - total_expected_length) / total_bullet_length * 100)
+                percentage_to_shrink = min(90, percentage_to_shrink + 10)
+                old_result_json = copy.deepcopy(result_json)
+                response = actor_agent.step('Too long, please shorten the bullet points by ' + str(percentage_to_shrink) + '%.')
+                t_in, t_out = account_token(response)
+                local_t_in += t_in
+                local_t_out += t_out
+                result_json = get_json_from_response(response.msgs[0].content)
+            else:
+                break
+        critic_prompt = critic_template.render()
+        bullet_contents = ['textbox1'] + (['textbox2'] if num_textboxes == 2 else [])
+        # Visual overflow/blank detection & correction
+        for j, text_arrangement in enumerate(target_textboxes[:num_textboxes]):
+            bullet_content = bullet_contents[j]
+            curr_round = 0
+            while True:
+                if args.ablation_no_commenter:
+                    break
+                curr_round += 1
+                img = render_textbox(text_arrangement, result_json[bullet_content], local_tmp_dir)
+                if args.model_name_v.startswith('vllm_qwen') or args.ablation_no_example:
+                    critic_msg = BaseMessage.make_user_message(
+                        role_name="User",
+                        content=critic_prompt,
+                        image_list=[img],
+                    )
+                else:
+                    critic_msg = BaseMessage.make_user_message(
+                        role_name="User",
+                        content=critic_prompt,
+                        image_list=[Image.open(neg_img_path), Image.open(pos_img_path), img],
+                    )
+                critic_agent.reset()
+                response = critic_agent.step(critic_msg)
+                v_in, v_out = account_token(response)
+                local_v_in += v_in
+                local_v_out += v_out
+                decision = response.msgs[0].content.lower()
+                if decision in ['1', '1.', '"1"', "'1'"]:
+                    if curr_round > 10:
+                        print(f'Section {i}: Too many rounds of modification, breaking...')
+                        break
+                    if agent_modify:
+                        print(f'Section {i}: Text overflow detected, modifying...')
+                        modify_message = f'{bullet_content} is too long, please shorten that part, other content should stay the same. Return the entire modified JSON.'
+                        response = actor_agent.step(modify_message)
+                        t_in, t_out = account_token(response)
+                        local_t_in += t_in
+                        local_t_out += t_out
+                        result_json = get_json_from_response(response.msgs[0].content)
+                    else:
+                        # naive truncate
+                        result_json[bullet_content] = result_json[bullet_content][:-1]
+                    continue
+                elif decision in ['2', '2.', '"2"', "'2'"]:
+                    if args.no_blank_detection:
+                        print(f'Section {i}: No blank space detection, skipping...')
+                        break
+                    if curr_round > 10:
+                        print(f'Section {i}: Too many rounds of modification, breaking...')
+                        break
+                    print(f'Section {i}: Too much blank space detected, modifying...')
+                    modify_message = f'{bullet_content} is too short, please add one more bullet point, other content should stay the same. Return the entire modified JSON.'
+                    response = actor_agent.step(modify_message)
+                    t_in, t_out = account_token(response)
+                    local_t_in += t_in
+                    local_t_out += t_out
+                    result_json = get_json_from_response(response.msgs[0].content)
+                else:
+                    break
+        # Clean up temp dir
+        if local_tmp_dir:
+            try:
+                print(f'Section {i}: Cleaning up temp dir {local_tmp_dir}')
+                shutil.rmtree(local_tmp_dir)
+            except Exception as e:
+                print(f"Error cleaning up temp dir {local_tmp_dir}: {e}")
+        return i, result_json, local_t_in, local_t_out, local_v_in, local_v_out
+    # ----------------------- Parallel execution -----------------------
+    max_workers = getattr(args, 'max_workers', 4)
+    results = {}
+    lock = threading.Lock()
+    with ThreadPoolExecutor(max_workers=max_workers) as ex:
+        futures = {
+            ex.submit(_process_section, i): i
+            for i in range(1, len(raw_content['sections']))
+        }
+        for fut in as_completed(futures):
+            i, rjson, t_in, t_out, v_in, v_out = fut.result()
+            with lock:
+                results[i] = rjson
+                total_input_token_t += t_in
+                total_output_token_t += t_out
+                total_input_token_v += v_in
+                total_output_token_v += v_out
+    # ----------------------- Title generation (sequential) -----------------------
+    title_json, title_input_token, title_output_token = gen_poster_title_content(args, actor_config)
+    total_input_token_t += title_input_token
+    total_output_token_t += title_output_token
+    # ----------------------- Assemble & save -----------------------
+    bullet_point_content = [title_json]
+    for idx in range(1, len(raw_content['sections'])):
+        bullet_point_content.append(results[idx])
+    json.dump(
+        bullet_point_content,
+        open(f'contents/<{args.model_name_t}_{args.model_name_v}>_{args.poster_name}_bullet_point_content_{args.index}.json', 'w'),
+        indent=2
+    )
+    return total_input_token_t, total_output_token_t, total_input_token_v, total_output_token_v
+def gen_poster_content(args, actor_config):
+    total_input_token, total_output_token = 0, 0
+    raw_content = json.load(open(f'contents/{args.model_name}_{args.poster_name}_raw_content.json', 'r'))
+    agent_name = 'poster_content_agent'
+    with open(f"utils/prompt_templates/{agent_name}.yaml", "r") as f:
+        content_config = yaml.safe_load(f)
+    actor_model = ModelFactory.create(
+        model_platform=actor_config['model_platform'],
+        model_type=actor_config['model_type'],
+        model_config_dict=actor_config['model_config']
+    )
+    actor_sys_msg = content_config['system_prompt']
+    def create_actor_agent():
+        actor_agent = ChatAgent(
+            system_message=actor_sys_msg,
+            model=actor_model,
+            message_window_size=10
+        )
+        return actor_agent
+    outline = json.load(open(f'outlines/{args.model_name}_{args.poster_name}_outline_{args.index}.json', 'r'))
+    raw_outline = json.loads(json.dumps(outline))
+    outline_estimate_num_chars(outline)
+    outline = remove_hierarchy_and_id(outline)
+    sections = list(outline.keys())
+    sections = [s for s in sections if s != 'meta']
+    jinja_env = Environment(undefined=StrictUndefined)
+    template = jinja_env.from_string(content_config["template"])
+    poster_content = {}
+    poster_content, total_input_token, total_output_token = gen_content_parallel_process_sections(
+        sections,
+        outline,
+        raw_content,
+        raw_outline,
+        template,
+        create_actor_agent,
+        MAX_ATTEMPT=5
+    )
+    json.dump(poster_content, open(f'contents/{args.model_name}_{args.poster_name}_poster_content_{args.index}.json', 'w'), indent=2)
+    return total_input_token, total_output_token
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--poster_name', type=str, default=None)
+    parser.add_argument('--model_name', type=str, default='4o')
+    parser.add_argument('--poster_path', type=str, required=True)
+    parser.add_argument('--index', type=int, default=0)
+    parser.add_argument('--max_retry', type=int, default=3)
+    args = parser.parse_args()
+    actor_config = get_agent_config(args.model_name)
+    if args.poster_name is None:
+        args.poster_name = args.poster_path.split('/')[-1].replace('.pdf', '').replace(' ', '_')
+    input_token, output_token = gen_poster_content(args, actor_config)
+    print(f'Token consumption: {input_token} -> {output_token}')

Paper2Poster/PosterAgent/gen_pptx_code.py ADDED Viewed

	@@ -0,0 +1,249 @@

+import re
+import json
+def sanitize_for_var(name):
+    # Convert any character that is not alphanumeric or underscore into underscore.
+    return re.sub(r'[^0-9a-zA-Z_]+', '_', name)
+def initialize_poster_code(width, height, slide_object_name, presentation_object_name, utils_functions):
+    code = utils_functions
+    code += fr'''
+# Poster: {presentation_object_name}
+{presentation_object_name} = create_poster(width_inch={width}, height_inch={height})
+# Slide: {slide_object_name}
+{slide_object_name} = add_blank_slide({presentation_object_name})
+'''
+    return code
+def save_poster_code(output_file, utils_functions, presentation_object_name):
+    code = utils_functions
+    code = fr'''
+# Save the presentation
+save_presentation({presentation_object_name}, file_name="{output_file}")
+'''
+    return code
+def generate_panel_code(panel_dict, utils_functions, slide_object_name, visible=False, theme=None):
+    code = utils_functions
+    raw_name = panel_dict["panel_name"]
+    var_name = 'var_' + sanitize_for_var(raw_name)
+    code += fr'''
+# Panel: {raw_name}
+{var_name} = add_textbox(
+    {slide_object_name},
+    '{var_name}',
+    {panel_dict['x']},
+    {panel_dict['y']},
+    {panel_dict['width']},
+    {panel_dict['height']},
+    text="",
+    word_wrap=True,
+    font_size=40,
+    bold=False,
+    italic=False,
+    alignment="left",
+    fill_color=None,
+    font_name="Arial"
+)
+'''
+    if visible:
+        if theme is None:
+            code += fr'''
+# Make border visible
+style_shape_border({var_name}, color=(0, 0, 0), thickness=5, line_style="solid")
+'''
+        else:
+            code += fr'''
+# Make border visible
+style_shape_border({var_name}, color={theme['color']}, thickness={theme['thickness']}, line_style="{theme['line_style']}")
+'''
+    return code
+def generate_textbox_code(
+    text_dict,
+    utils_functions,
+    slide_object_name,
+    visible=False,
+    content=None,
+    theme=None,
+    tmp_dir='tmp',
+    is_title=False,
+):
+    code = utils_functions
+    raw_name = text_dict["textbox_name"]
+    var_name = sanitize_for_var(raw_name)
+    code += fr'''
+# Textbox: {raw_name}
+{var_name} = add_textbox(
+    {slide_object_name},
+    '{var_name}',
+    {text_dict['x']},
+    {text_dict['y']},
+    {text_dict['width']},
+    {text_dict['height']},
+    text="",
+    word_wrap=True,
+    font_size=40,
+    bold=False,
+    italic=False,
+    alignment="left",
+    fill_color=None,
+    font_name="Arial"
+)
+'''
+    if visible:
+        # Extract textbox_theme from full theme if needed
+        textbox_border_theme = None
+        if theme is not None and isinstance(theme, dict):
+            textbox_border_theme = theme.get('textbox_theme')
+        if textbox_border_theme is None:
+            code += fr'''
+# Make border visible
+style_shape_border({var_name}, color=(255, 0, 0), thickness=5, line_style="solid")
+'''
+        else:
+            code += fr'''
+# Make border visible
+style_shape_border({var_name}, color={textbox_border_theme['color']}, thickness={textbox_border_theme['thickness']}, line_style="{textbox_border_theme['line_style']}")
+'''
+    if content is not None:
+        tmp_name = f'{tmp_dir}/{var_name}_content.json'
+        json.dump(content, open(tmp_name, 'w'), indent=4)
+        # Determine vertical alignment
+        vertical_anchor = None
+        if is_title and theme is not None and 'section_title_vertical_align' in theme:
+            vertical_anchor = theme['section_title_vertical_align']
+        if vertical_anchor:
+            code += fr'''
+fill_textframe({var_name}, json.load(open('{tmp_name}', 'r')), vertical_anchor="{vertical_anchor}")
+'''
+        else:
+            code += fr'''
+fill_textframe({var_name}, json.load(open('{tmp_name}', 'r')))
+'''
+    return code
+def generate_figure_code(figure_dict, utils_functions, slide_object_name, img_path, visible=False, theme=None):
+    code = utils_functions
+    raw_name = figure_dict["figure_name"]
+    var_name = sanitize_for_var(raw_name)
+    code += fr'''
+# Figure: {raw_name}
+{var_name} = add_image(
+    {slide_object_name},
+    '{var_name}',
+    {figure_dict['x']},
+    {figure_dict['y']},
+    {figure_dict['width']},
+    {figure_dict['height']},
+    image_path="{img_path}"
+)
+'''
+    if visible:
+        if theme is None:
+            code += fr'''
+# Make border visible
+style_shape_border({var_name}, color=(0, 0, 255), thickness=5, line_style="long_dash_dot")
+'''
+        else:
+            code += fr'''
+# Make border visible
+style_shape_border({var_name}, color={theme['color']}, thickness={theme['thickness']}, line_style="{theme['line_style']}")
+'''
+    return code
+def generate_poster_code(
+    panel_arrangement_list,
+    text_arrangement_list,
+    figure_arrangement_list,
+    presentation_object_name,
+    slide_object_name,
+    utils_functions,
+    slide_width,
+    slide_height,
+    img_path,
+    save_path,
+    visible=False,
+    content=None,
+    check_overflow=False,
+    theme=None,
+    tmp_dir='tmp',
+):
+    code = ''
+    code += initialize_poster_code(slide_width, slide_height, slide_object_name, presentation_object_name, utils_functions)
+    if theme is None:
+        panel_visible = visible
+        textbox_visible = visible
+        figure_visible = visible
+        panel_theme, textbox_theme, figure_theme = None, None, None
+    else:
+        panel_visible = theme['panel_visible']
+        textbox_visible = theme['textbox_visible']
+        figure_visible = theme['figure_visible']
+        panel_theme = theme['panel_theme']
+        textbox_theme = theme['textbox_theme']
+        figure_theme = theme['figure_theme']
+    for p in panel_arrangement_list:
+        code += generate_panel_code(p, '', slide_object_name, panel_visible, panel_theme)
+    if check_overflow:
+        t = text_arrangement_list[0]
+        # Pass full theme for consistency
+        code += generate_textbox_code(t, '', slide_object_name, textbox_visible, content, theme, tmp_dir, is_title=False)
+    else:
+        all_content = []
+        title_indices = set()  # Track which indices are section titles
+        if content is not None:
+            idx = 0
+            for section_content in content:
+                if 'title' in section_content:
+                    all_content.append(section_content['title'])
+                    title_indices.add(idx)  # Mark this index as a title
+                    idx += 1
+                if len(section_content) == 2:
+                    all_content.append(section_content['textbox1'])
+                    idx += 1
+                elif len(section_content) == 3:
+                    all_content.append(section_content['textbox1'])
+                    all_content.append(section_content['textbox2'])
+                    idx += 2
+                else:
+                    raise ValueError(f"Unexpected content length: {len(section_content)}")
+        for i in range(len(text_arrangement_list)):
+            t = text_arrangement_list[i]
+            if content is not None:
+                textbox_content = all_content[i]
+                is_title = i in title_indices
+            else:
+                textbox_content = None
+                is_title = False
+            # Pass full theme (not textbox_theme) so vertical alignment config is available
+            code += generate_textbox_code(t, '', slide_object_name, textbox_visible, textbox_content, theme, tmp_dir, is_title=is_title)
+    for f in figure_arrangement_list:
+        if img_path is None:
+            code += generate_figure_code(f, '', slide_object_name, f['figure_path'], figure_visible, figure_theme)
+        else:
+            code += generate_figure_code(f, '', slide_object_name, img_path, figure_visible, figure_theme)
+    code += save_poster_code(save_path, '', presentation_object_name)
+    return code

Paper2Poster/PosterAgent/new_pipeline.py ADDED Viewed

	@@ -0,0 +1,547 @@

+import os
+print("Initializing...")
+from PosterAgent.parse_raw import parse_raw, gen_image_and_table
+from PosterAgent.gen_outline_layout import filter_image_table, gen_outline_layout_v2
+from utils.wei_utils import get_agent_config, utils_functions, run_code, scale_to_target_area, char_capacity
+from PosterAgent.tree_split_layout import main_train, main_inference, get_arrangments_in_inches, split_textbox, to_inches
+# from PosterAgent.gen_pptx_code import generate_poster_code
+# from utils.src.utils import ppt_to_images
+# from PosterAgent.gen_poster_content import gen_bullet_point_content
+from utils.ablation_utils import no_tree_get_layout
+# Import refactored utilities
+from utils.logo_utils import LogoManager, add_logos_to_poster_code
+# from utils.config_utils import (
+#     load_poster_yaml_config, extract_font_sizes, extract_colors,
+#     extract_vertical_alignment, extract_section_title_symbol, normalize_config_values
+# )
+# from utils.style_utils import apply_all_styles
+# from utils.theme_utils import get_default_theme, create_theme_with_alignment, resolve_colors
+# from PosterAgent.gen_beamer_code import (
+#     generate_beamer_poster_code,
+#     save_beamer_code,
+#     compile_beamer_to_pdf,
+#     convert_pptx_layout_to_beamer
+# )
+import argparse
+import json
+import time
+import shutil
+units_per_inch = 25
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='Poster Generation Pipeline with Logo Support')
+    parser.add_argument('--poster_path', type=str)
+    parser.add_argument('--model_name_t', type=str, default='4o')
+    parser.add_argument('--model_name_v', type=str, default='4o')
+    parser.add_argument('--index', type=int, default=0)
+    parser.add_argument('--poster_name', type=str, default=None)
+    parser.add_argument('--tmp_dir', type=str, default='tmp')
+    parser.add_argument('--estimate_chars', action='store_true')
+    parser.add_argument('--max_workers', type=int, default=10)
+    parser.add_argument('--poster_width_inches', type=int, default=None)
+    parser.add_argument('--poster_height_inches', type=int, default=None)
+    parser.add_argument('--no_blank_detection', action='store_true', help='When overflow is severe, try this option.')
+    parser.add_argument('--ablation_no_tree_layout', action='store_true', help='Ablation study: no tree layout')
+    parser.add_argument('--ablation_no_commenter', action='store_true', help='Ablation study: no commenter')
+    parser.add_argument('--ablation_no_example', action='store_true', help='Ablation study: no example')
+    # Logo-related arguments
+    parser.add_argument('--conference_venue', type=str, default=None,
+                       help='Conference name for automatic logo search (e.g., "NeurIPS", "CVPR")')
+    parser.add_argument('--institution_logo_path', type=str, default=None,
+                       help='Custom path to institution logo (auto-searches from paper metadata if not provided)')
+    parser.add_argument('--conference_logo_path', type=str, default=None,
+                       help='Custom path to conference logo (auto-searches if venue specified)')
+    parser.add_argument('--use_google_search', action='store_true',
+                       help='Use Google Custom Search API for logo search (requires API keys in .env)')
+    args = parser.parse_args()
+    start_time = time.time()
+    os.makedirs(args.tmp_dir, exist_ok=True)
+    detail_log = {}
+    agent_config_t = get_agent_config(args.model_name_t)
+    agent_config_v = get_agent_config(args.model_name_v)
+    poster_name = args.poster_path.split('/')[-2].replace(' ', '_')
+    if args.poster_name is None:
+        args.poster_name = poster_name
+    else:
+        poster_name = args.poster_name
+    meta_json_path = args.poster_path.replace('paper.pdf', 'meta.json')
+    if args.poster_width_inches is not None and args.poster_height_inches is not None:
+        poster_width = args.poster_width_inches * units_per_inch
+        poster_height = args.poster_height_inches * units_per_inch
+    elif os.path.exists(meta_json_path):
+        meta_json = json.load(open(meta_json_path, 'r'))
+        poster_width = meta_json['width']
+        poster_height = meta_json['height']
+    else:
+        poster_width = 48 * units_per_inch
+        poster_height = 36 * units_per_inch
+    poster_width, poster_height = scale_to_target_area(poster_width, poster_height)
+    poster_width_inches = to_inches(poster_width, units_per_inch)
+    poster_height_inches = to_inches(poster_height, units_per_inch)
+    if poster_width_inches > 56 or poster_height_inches > 56:
+        # Work out which side is longer, then compute a single scale factor
+        if poster_width_inches >= poster_height_inches:
+            scale_factor = 56 / poster_width_inches
+        else:
+            scale_factor = 56 / poster_height_inches
+        poster_width_inches  *= scale_factor
+        poster_height_inches *= scale_factor
+        # convert back to internal units
+        poster_width  = poster_width_inches  * units_per_inch
+        poster_height = poster_height_inches * units_per_inch
+    print(f'Poster size: {poster_width_inches} x {poster_height_inches} inches')
+    total_input_tokens_t, total_output_tokens_t = 0, 0
+    total_input_tokens_v, total_output_tokens_v = 0, 0
+    # Step 1: Parse the raw poster
+    input_token, output_token, raw_result = parse_raw(args, agent_config_t, version=2)
+    total_input_tokens_t += input_token
+    total_output_tokens_t += output_token
+    _, _, images, tables = gen_image_and_table(args, raw_result)
+    print(f'Parsing token consumption: {input_token} -> {output_token}')
+    parser_time_taken = time.time() - start_time
+    print(f'Parser time: {parser_time_taken:.2f} seconds')
+    detail_log['parser_time'] = parser_time_taken
+    parser_time = time.time()
+    detail_log['parser_in_t'] = input_token
+    detail_log['parser_out_t'] = output_token
+    # Initialize LogoManager
+    logo_manager = LogoManager()
+    institution_logo_path = args.institution_logo_path
+    conference_logo_path = args.conference_logo_path
+    # Auto-detect institution from paper if not provided
+    # Now using the raw_result directly instead of reading from file
+    if not institution_logo_path:
+        print("\n" + "="*60)
+        print("🔍 AUTO-DETECTING INSTITUTION FROM PAPER")
+        print("="*60)
+        # Use the raw_result we already have from the parser
+        if raw_result:
+            print(f"📄 Using parsed paper content")
+            # Extract text content from the ConversionResult object
+            try:
+                paper_text = raw_result.document.export_to_markdown()
+            except:
+                # Fallback: try to get text content in another way
+                paper_text = str(raw_result)
+            print("🔎 Searching for FIRST AUTHOR's institution...")
+            first_author_inst = logo_manager.extract_first_author_institution(paper_text)
+            if first_author_inst:
+                print(f"\n✅ FIRST AUTHOR INSTITUTION: {first_author_inst}")
+                print(f"🔍 Searching for logo: {first_author_inst}")
+                inst_logo_path = logo_manager.get_logo_path(first_author_inst, category="institute", use_google=args.use_google_search)
+                if inst_logo_path:
+                    institution_logo_path = str(inst_logo_path)
+                    print(f"✅ Institution logo found: {institution_logo_path}")
+                else:
+                    print(f"❌ Could not find/download logo for: {first_author_inst}")
+            else:
+                print("❌ No first author institution detected or matched with available logos")
+        else:
+            print("❌ No parsed content available")
+        print("="*60 + "\n")
+    # Handle conference logo
+    if args.conference_venue and not conference_logo_path:
+        print("\n" + "="*60)
+        print("🏛️ SEARCHING FOR CONFERENCE LOGO")
+        print("="*60)
+        print(f"📍 Conference: {args.conference_venue}")
+        print(f"🔍 Searching for logo...")
+        conf_logo_path = logo_manager.get_logo_path(args.conference_venue, category="conference", use_google=args.use_google_search)
+        if conf_logo_path:
+            conference_logo_path = str(conf_logo_path)
+            print(f"✅ Conference logo found: {conference_logo_path}")
+        else:
+            print(f"❌ Could not find/download logo for: {args.conference_venue}")
+            # Note: Web search is now handled inside get_logo_path automatically
+        print("="*60 + "\n")
+    # Step 2: Filter unnecessary images and tables
+    input_token, output_token = filter_image_table(args, agent_config_t)
+    total_input_tokens_t += input_token
+    total_output_tokens_t += output_token
+    print(f'Filter figures token consumption: {input_token} -> {output_token}')
+    filter_time_taken = time.time() - parser_time
+    print(f'Filter time: {filter_time_taken:.2f} seconds')
+    detail_log['filter_time'] = filter_time_taken
+    filter_time = time.time()
+    detail_log['filter_in_t'] = input_token
+    detail_log['filter_out_t'] = output_token
+    # Step 3: Generate outline
+    input_token, output_token, panels, figures = gen_outline_layout_v2(args, agent_config_t)
+    total_input_tokens_t += input_token
+    total_output_tokens_t += output_token
+    print(f'Outline token consumption: {input_token} -> {output_token}')
+    outline_time_taken = time.time() - filter_time
+    print(f'Outline time: {outline_time_taken:.2f} seconds')
+    detail_log['outline_time'] = outline_time_taken
+    outline_time = time.time()
+    detail_log['outline_in_t'] = input_token
+    detail_log['outline_out_t'] = output_token
+    if args.ablation_no_tree_layout:
+        panel_arrangement, figure_arrangement, text_arrangement, input_token, output_token = no_tree_get_layout(
+            poster_width,
+            poster_height,
+            panels,
+            figures,
+            agent_config_t
+        )
+        total_input_tokens_t += input_token
+        total_output_tokens_t += output_token
+        print(f'No tree layout token consumption: {input_token} -> {output_token}')
+        detail_log['no_tree_layout_in_t'] = input_token
+        detail_log['no_tree_layout_out_t'] = output_token
+    else:
+        # Step 4: Learn and generate layout
+        panel_model_params, figure_model_params = main_train()
+        panel_arrangement, figure_arrangement, text_arrangement = main_inference(
+            panels,
+            panel_model_params,
+            figure_model_params,
+            poster_width,
+            poster_height,
+            shrink_margin=3
+        )
+        text_arrangement_title = text_arrangement[0]
+        text_arrangement = text_arrangement[1:]
+        # Split the title textbox into two parts
+        text_arrangement_title_top, text_arrangement_title_bottom = split_textbox(
+            text_arrangement_title,
+            0.8
+        )
+        # Add the split textboxes back to the list
+        text_arrangement = [text_arrangement_title_top, text_arrangement_title_bottom] + text_arrangement
+    for i in range(len(figure_arrangement)):
+        panel_id = figure_arrangement[i]['panel_id']
+        panel_section_name = panels[panel_id]['section_name']
+        figure_info = figures[panel_section_name]
+        if 'image' in figure_info:
+            figure_id = figure_info['image']
+            if not figure_id in images:
+                figure_path = images[str(figure_id)]['image_path']
+            else:
+                figure_path = images[figure_id]['image_path']
+        elif 'table' in figure_info:
+            figure_id = figure_info['table']
+            if not figure_id in tables:
+                figure_path = tables[str(figure_id)]['table_path']
+            else:
+                figure_path = tables[figure_id]['table_path']
+        figure_arrangement[i]['figure_path'] = figure_path
+    for text_arrangement_item in text_arrangement:
+        num_chars = char_capacity(
+            bbox=(text_arrangement_item['x'], text_arrangement_item['y'], text_arrangement_item['height'], text_arrangement_item['width'])
+        )
+        text_arrangement_item['num_chars'] = num_chars
+    width_inch, height_inch, panel_arrangement_inches, figure_arrangement_inches, text_arrangement_inches = get_arrangments_in_inches(
+        poster_width, poster_height, panel_arrangement, figure_arrangement, text_arrangement, 25
+    )
+    # Save to file
+    tree_split_results = {
+        'poster_width': poster_width,
+        'poster_height': poster_height,
+        'poster_width_inches': width_inch,
+        'poster_height_inches': height_inch,
+        'panels': panels,
+        'panel_arrangement': panel_arrangement,
+        'figure_arrangement': figure_arrangement,
+        'text_arrangement': text_arrangement,
+        'panel_arrangement_inches': panel_arrangement_inches,
+        'figure_arrangement_inches': figure_arrangement_inches,
+        'text_arrangement_inches': text_arrangement_inches,
+    }
+    os.makedirs('tree_splits', exist_ok=True)
+    with open(f'tree_splits/<{args.model_name_t}_{args.model_name_v}>_{args.poster_name}_tree_split_{args.index}.json', 'w') as f:
+        json.dump(tree_split_results, f, indent=4)
+    layout_time_taken = time.time() - outline_time
+    print(f'Layout time: {layout_time_taken:.2f} seconds')
+    detail_log['layout_time'] = layout_time_taken
+    layout_time = time.time()
+    # # === Configuration Loading ===
+    # print("\n📋 Loading configuration from YAML files...", flush=True)
+    # yaml_cfg = load_poster_yaml_config(args.poster_path)
+    # # Extract configuration values
+    # bullet_fs, title_fs, poster_title_fs, poster_author_fs = extract_font_sizes(yaml_cfg)
+    # title_text_color, title_fill_color, main_text_color, main_text_fill_color = extract_colors(yaml_cfg)
+    # section_title_vertical_align = extract_vertical_alignment(yaml_cfg)
+    # section_title_symbol = extract_section_title_symbol(yaml_cfg)
+    # # Normalize configuration values
+    # bullet_fs, title_fs, poster_title_fs, poster_author_fs, \
+    # title_text_color, title_fill_color, main_text_color, main_text_fill_color = normalize_config_values(
+    #     bullet_fs, title_fs, poster_title_fs, poster_author_fs,
+    #     title_text_color, title_fill_color, main_text_color, main_text_fill_color
+    # )
+    # # Store configuration in args
+    # setattr(args, 'bullet_font_size', bullet_fs)
+    # setattr(args, 'section_title_font_size', title_fs)
+    # setattr(args, 'poster_title_font_size', poster_title_fs)
+    # setattr(args, 'poster_author_font_size', poster_author_fs)
+    # setattr(args, 'title_text_color', title_text_color)
+    # setattr(args, 'title_fill_color', title_fill_color)
+    # setattr(args, 'main_text_color', main_text_color)
+    # setattr(args, 'main_text_fill_color', main_text_fill_color)
+    # setattr(args, 'section_title_vertical_align', section_title_vertical_align)
+    # # Step 5: Generate content
+    # print(f"\n✍️ Generating poster content (max_workers={args.max_workers})...", flush=True)
+    # # --- Step 1: 检查缓存 ---
+    # content_cache_path = f'contents/<{args.model_name_t}_{args.model_name_v}>_{args.poster_name}_bullet_point_content_{args.index}.json'
+    # if os.path.exists(content_cache_path):
+    #     print(f"🧩 Cache found: {content_cache_path}")
+    #     print("⚡ Skipping model generation, loading from cache...")
+    #     bullet_content = json.load(open(content_cache_path, 'r'))
+    #     input_token_t = output_token_t = input_token_v = output_token_v = 0
+    # else:
+    #     print("🧠 Running model to generate poster content...")
+    #     input_token_t, output_token_t, input_token_v, output_token_v = gen_bullet_point_content(
+    #         args, agent_config_t, agent_config_v, tmp_dir=args.tmp_dir
+    #     )
+    #     bullet_content = json.load(open(content_cache_path, 'r'))
+    # input_token_t, output_token_t, input_token_v, output_token_v = gen_bullet_point_content(args, agent_config_t, agent_config_v, tmp_dir=args.tmp_dir)
+    # total_input_tokens_t += input_token
+    # total_output_tokens_t += output_token
+    # total_input_tokens_v += input_token_v
+    # total_output_tokens_v += output_token_v
+    # print(f'Content generation token consumption T: {input_token_t} -> {output_token_t}')
+    # print(f'Content generation token consumption V: {input_token_v} -> {output_token_v}')
+    # content_time_taken = time.time() - layout_time
+    # print(f'Content generation time: {content_time_taken:.2f} seconds')
+    # detail_log['content_time'] = content_time_taken
+    # content_time = time.time()
+    # bullet_content = json.load(open(f'contents/<{args.model_name_t}_{args.model_name_v}>_{args.poster_name}_bullet_point_content_{args.index}.json', 'r'))
+    # detail_log['content_in_t'] = input_token_t
+    # detail_log['content_out_t'] = output_token_t
+    # detail_log['content_in_v'] = input_token_v
+    # detail_log['content_out_v'] = output_token_v
+    # # === Style Application ===
+    # print("\n🎨 Applying styles and colors...", flush=True)
+    # # Resolve colors with fallbacks
+    # final_title_text_color, final_title_fill_color, final_main_text_color, final_main_text_fill_color = resolve_colors(
+    #     getattr(args, 'title_text_color', None),
+    #     getattr(args, 'title_fill_color', None),
+    #     getattr(args, 'main_text_color', None),
+    #     getattr(args, 'main_text_fill_color', None)
+    # )
+    # # Apply all styles in one go
+    # bullet_content = apply_all_styles(
+    #     bullet_content,
+    #     title_text_color=final_title_text_color,
+    #     title_fill_color=final_title_fill_color,
+    #     main_text_color=final_main_text_color,
+    #     main_text_fill_color=final_main_text_fill_color,
+    #     section_title_symbol=section_title_symbol,
+    #     main_text_font_size=bullet_fs
+    # )
+    # # === Poster Generation ===
+    # # print("\n🎯 Generating PowerPoint code...", flush=True)
+    # # Create theme with alignment
+    # base_theme = get_default_theme()
+    # theme_with_alignment = create_theme_with_alignment(
+    #     base_theme,
+    #     getattr(args, 'section_title_vertical_align', None)
+    # )
+    # # poster_code = generate_poster_code(
+    # #     panel_arrangement_inches,
+    # #     text_arrangement_inches,
+    # #     figure_arrangement_inches,
+    # #     presentation_object_name='poster_presentation',
+    # #     slide_object_name='poster_slide',
+    # #     utils_functions=utils_functions,
+    # #     slide_width=width_inch,
+    # #     slide_height=height_inch,
+    # #     img_path=None,
+    # #     save_path=f'{args.tmp_dir}/poster.pptx',
+    # #     visible=False,
+    # #     content=bullet_content,
+    # #     theme=theme_with_alignment,
+    # #     tmp_dir=args.tmp_dir,
+    # # )
+    # print("\n🎯 Generating Beamer poster (LaTeX)...", flush=True)
+    # # --- 1. 提取 poster_info ---
+    # poster_info = {
+    #     "title": args.poster_name,
+    #     "author": "AutoGen",
+    #     "institute": "Auto-detected Institution"
+    # }
+    # if isinstance(bullet_content, list) and len(bullet_content) > 0:
+    #     first_section = bullet_content[0]
+    #     if isinstance(first_section, dict):
+    #         if "poster_title" in first_section:
+    #             poster_info["title"] = first_section["poster_title"]
+    #         elif "title" in first_section:
+    #             poster_info["title"] = first_section["title"]
+    # --- 2. 构造 Beamer 数据结构 ---
+    # layout_data = {
+    #     "text_arrangement": text_arrangement,
+    #     "figure_arrangement": figure_arrangement
+    # }
+    # beamer_data = convert_pptx_layout_to_beamer(layout_data)
+    # 将 bullet_content 映射进 sections
+    # for i, section in enumerate(beamer_data["sections"]):
+    #     if i < len(bullet_content):
+    #         section_data = bullet_content[i]
+    #         if isinstance(section_data, dict):
+    #             section["content"] = section_data.get("textbox1") or section_data.get("title") or json.dumps(section_data)
+    #         else:
+    #             section["content"] = str(section_data)
+    # --- 3. 生成 LaTeX 文件 ---
+    # poster_info = {k: (str(v) if not isinstance(v, str) else v) for k, v in poster_info.items()}
+    # beamer_code = generate_beamer_poster_code(
+    #     beamer_data["sections"],
+    #     beamer_data["figures"],
+    #     poster_info,
+    #     width_cm=poster_width_inches * 2.54,
+    #     height_cm=poster_height_inches * 2.54,
+    #     theme="Madrid",
+    #     output_path=f"{args.tmp_dir}/{poster_name}.tex"
+    # )
+    # save_beamer_code(beamer_code, f"{args.tmp_dir}/{poster_name}.tex")
+    # --- 4. 编译为 PDF ---
+    # output_dir = f'<{args.model_name_t}_{args.model_name_v}>_generated_beamer_posters/{args.poster_path.replace("paper.pdf", "")}'
+    # compile_beamer_to_pdf(f"{args.tmp_dir}/{poster_name}.tex", output_dir=args.tmp_dir)
+    # pdf_path = os.path.join(args.tmp_dir, f"{poster_name}.pdf")
+    # os.makedirs(output_dir, exist_ok=True)
+    # os.rename(pdf_path, os.path.join(output_dir, f"{poster_name}.pdf"))
+    # print(f"✅ Beamer poster PDF saved to {output_dir}")
+    # Add logos to the poster
+    # print("\n🖼️ Adding logos to poster...", flush=True)
+    # poster_code = add_logos_to_poster_code(
+    #     poster_code,
+    #     width_inch,
+    #     height_inch,
+    #     institution_logo_path=institution_logo_path,
+    #     conference_logo_path=conference_logo_path
+    # )
+    # output, err = run_code(poster_code)
+    # if err is not None:
+    #     raise RuntimeError(f'Error in generating PowerPoint: {err}')
+    # # Step 8: Create a folder in the output directory
+    # output_dir = f'<{args.model_name_t}_{args.model_name_v}>_generated_posters/{args.poster_path.replace("paper.pdf", "")}'
+    # os.makedirs(output_dir, exist_ok=True)
+    # # Copy logos to output directory for reference
+    # logos_dir = os.path.join(output_dir, 'logos')
+    # if institution_logo_path or conference_logo_path:
+    #     os.makedirs(logos_dir, exist_ok=True)
+    #     if institution_logo_path and os.path.exists(institution_logo_path):
+    #         shutil.copy2(institution_logo_path, os.path.join(logos_dir, 'institution_logo' + os.path.splitext(institution_logo_path)[1]))
+    #     if conference_logo_path and os.path.exists(conference_logo_path):
+    #         shutil.copy2(conference_logo_path, os.path.join(logos_dir, 'conference_logo' + os.path.splitext(conference_logo_path)[1]))
+    # # Step 9: Move poster.pptx to the output directory
+    # pptx_path = os.path.join(output_dir, f'{poster_name}.pptx')
+    # os.rename(f'{args.tmp_dir}/poster.pptx', pptx_path)
+    # print(f'Poster PowerPoint saved to {pptx_path}')
+    # # Step 10: Convert the PowerPoint to images
+    # ppt_to_images(pptx_path, output_dir)
+    # print(f'Poster images saved to {output_dir}')
+    # end_time = time.time()
+    # time_taken = end_time - start_time
+    # render_time_taken = time.time() - content_time
+    # print(f'Render time: {render_time_taken:.2f} seconds')
+    # detail_log['render_time'] = render_time_taken
+    # # log
+    # log_file = os.path.join(output_dir, 'log.json')
+    # with open(log_file, 'w') as f:
+    #     log_data = {
+    #         'input_tokens_t': total_input_tokens_t,
+    #         'output_tokens_t': total_output_tokens_t,
+    #         'input_tokens_v': total_input_tokens_v,
+    #         'output_tokens_v': total_output_tokens_v,
+    #         'time_taken': time_taken,
+    #         'institution_logo': institution_logo_path,
+    #         'conference_logo': conference_logo_path,
+    #     }
+    #     json.dump(log_data, f, indent=4)
+    # detail_log_file = os.path.join(output_dir, 'detail_log.json')
+    # with open(detail_log_file, 'w') as f:
+    #     json.dump(detail_log, f, indent=4)
+    # print(f'\nTotal time: {time_taken:.2f} seconds')
+    # print(f'Total text model tokens: {total_input_tokens_t} -> {total_output_tokens_t}')
+    # print(f'Total vision model tokens: {total_input_tokens_v} -> {total_output_tokens_v}')
+    # if institution_logo_path:
+    #     print(f'Institution logo added: {institution_logo_path}')
+    # if conference_logo_path:
+    #     print(f'Conference logo added: {conference_logo_path}')

Paper2Poster/PosterAgent/parse_raw.py ADDED Viewed

	@@ -0,0 +1,237 @@

+from dotenv import load_dotenv
+from utils.src.utils import get_json_from_response
+from utils.src.model_utils import parse_pdf
+import json
+import random
+from camel.models import ModelFactory
+from camel.agents import ChatAgent
+from tenacity import retry, stop_after_attempt
+from docling_core.types.doc import ImageRefMode, PictureItem, TableItem
+from docling.datamodel.base_models import InputFormat
+from docling.datamodel.pipeline_options import PdfPipelineOptions
+from docling.document_converter import DocumentConverter, PdfFormatOption
+from pathlib import Path
+import PIL
+from marker.models import create_model_dict
+from utils.wei_utils import *
+from utils.pptx_utils import *
+from utils.critic_utils import *
+import torch
+from jinja2 import Template
+import re
+import argparse
+load_dotenv()
+IMAGE_RESOLUTION_SCALE = 5.0
+pipeline_options = PdfPipelineOptions()
+pipeline_options.images_scale = IMAGE_RESOLUTION_SCALE
+pipeline_options.generate_page_images = True
+pipeline_options.generate_picture_images = True
+doc_converter = DocumentConverter(
+    format_options={
+        InputFormat.PDF: PdfFormatOption(pipeline_options=pipeline_options)
+    }
+)
+@retry(stop=stop_after_attempt(5))
+def parse_raw(args, actor_config, version=2):
+    raw_source = args.poster_path
+    markdown_clean_pattern = re.compile(r"<!--[\s\S]*?-->")
+    raw_result = doc_converter.convert(raw_source)
+    raw_markdown = raw_result.document.export_to_markdown()
+    text_content = markdown_clean_pattern.sub("", raw_markdown)
+    if len(text_content) < 500:
+        print('\nParsing with docling failed, using marker instead\n')
+        parser_model = create_model_dict(device='cuda', dtype=torch.float16)
+        text_content, rendered = parse_pdf(raw_source, model_lst=parser_model, save_file=False)
+    if version == 1:
+        template = Template(open("utils/prompts/gen_poster_raw_content.txt").read())
+    elif version == 2:
+        print('Using v2 prompt template')
+        template = Template(open("utils/prompts/gen_poster_raw_content_v2.txt").read())
+    if args.model_name_t.startswith('vllm_qwen'):
+        actor_model = ModelFactory.create(
+            model_platform=actor_config['model_platform'],
+            model_type=actor_config['model_type'],
+            model_config_dict=actor_config['model_config'],
+            url=actor_config['url'],
+        )
+    else:
+        actor_model = ModelFactory.create(
+            model_platform=actor_config['model_platform'],
+            model_type=actor_config['model_type'],
+            model_config_dict=actor_config['model_config'],
+        )
+    actor_sys_msg = 'You are the author of the paper, and you will create a poster for the paper.'
+    actor_agent = ChatAgent(
+        system_message=actor_sys_msg,
+        model=actor_model,
+        message_window_size=10,
+        token_limit=actor_config.get('token_limit', None)
+    )
+    while True:
+        prompt = template.render(
+            markdown_document=text_content,
+        )
+        actor_agent.reset()
+        response = actor_agent.step(prompt)
+        input_token, output_token = account_token(response)
+        content_json = get_json_from_response(response.msgs[0].content)
+        if len(content_json) > 0:
+            break
+        print('Error: Empty response, retrying...')
+        if args.model_name_t.startswith('vllm_qwen'):
+            text_content = text_content[:80000]
+    if len(content_json['sections']) > 9:
+        # First 2 sections + randomly select 5 sections + last 2 sections
+        selected_sections = content_json['sections'][:2] + random.sample(content_json['sections'][2:-2], 5) + content_json['sections'][-2:]
+        content_json['sections'] = selected_sections
+    has_title = False
+    for section in content_json['sections']:
+        if type(section) != dict or not 'title' in section or not 'content' in section:
+            print(f"Ouch! The response is invalid, the LLM is not following the format :(")
+            print('Trying again...')
+            raise
+        if 'title' in section['title'].lower():
+            has_title = True
+    if not has_title:
+        print('Ouch! The response is invalid, the LLM is not following the format :(')
+        raise
+    os.makedirs('contents', exist_ok=True)
+    json.dump(content_json, open(f'contents/<{args.model_name_t}_{args.model_name_v}>_{args.poster_name}_raw_content.json', 'w'), indent=4)
+    return input_token, output_token, raw_result
+def gen_image_and_table(args, conv_res):
+    input_token, output_token = 0, 0
+    raw_source = args.poster_path
+    output_dir = Path(f'<{args.model_name_t}_{args.model_name_v}>_images_and_tables/{args.poster_name}')
+    output_dir.mkdir(parents=True, exist_ok=True)
+    doc_filename = args.poster_name
+    # Save page images
+    for page_no, page in conv_res.document.pages.items():
+        page_no = page.page_no
+        page_image_filename = output_dir / f"{doc_filename}-{page_no}.png"
+        with page_image_filename.open("wb") as fp:
+            page.image.pil_image.save(fp, format="PNG")
+    # Save images of figures and tables
+    table_counter = 0
+    picture_counter = 0
+    for element, _level in conv_res.document.iterate_items():
+        if isinstance(element, TableItem):
+            table_counter += 1
+            element_image_filename = (
+                output_dir / f"{doc_filename}-table-{table_counter}.png"
+            )
+            with element_image_filename.open("wb") as fp:
+                element.get_image(conv_res.document).save(fp, "PNG")
+        if isinstance(element, PictureItem):
+            picture_counter += 1
+            element_image_filename = (
+                output_dir / f"{doc_filename}-picture-{picture_counter}.png"
+            )
+            with element_image_filename.open("wb") as fp:
+                element.get_image(conv_res.document).save(fp, "PNG")
+    # Save markdown with embedded pictures
+    md_filename = output_dir / f"{doc_filename}-with-images.md"
+    conv_res.document.save_as_markdown(md_filename, image_mode=ImageRefMode.EMBEDDED)
+    # Save markdown with externally referenced pictures
+    md_filename = output_dir / f"{doc_filename}-with-image-refs.md"
+    conv_res.document.save_as_markdown(md_filename, image_mode=ImageRefMode.REFERENCED)
+    # Save HTML with externally referenced pictures
+    html_filename = output_dir / f"{doc_filename}-with-image-refs.html"
+    conv_res.document.save_as_html(html_filename, image_mode=ImageRefMode.REFERENCED)
+    tables = {}
+    table_index = 1
+    for table in conv_res.document.tables:
+        caption = table.caption_text(conv_res.document)
+        if len(caption) > 0:
+            table_img_path = f'<{args.model_name_t}_{args.model_name_v}>_images_and_tables/{args.poster_name}/{args.poster_name}-table-{table_index}.png'
+            table_img = PIL.Image.open(table_img_path)
+            tables[str(table_index)] = {
+                'caption': caption,
+                'table_path': table_img_path,
+                'width': table_img.width,
+                'height': table_img.height,
+                'figure_size': table_img.width * table_img.height,
+                'figure_aspect': table_img.width / table_img.height,
+            }
+        table_index += 1
+    images = {}
+    image_index = 1
+    for image in conv_res.document.pictures:
+        caption = image.caption_text(conv_res.document)
+        if len(caption) > 0:
+            image_img_path = f'<{args.model_name_t}_{args.model_name_v}>_images_and_tables/{args.poster_name}/{args.poster_name}-picture-{image_index}.png'
+            image_img = PIL.Image.open(image_img_path)
+            images[str(image_index)] = {
+                'caption': caption,
+                'image_path': image_img_path,
+                'width': image_img.width,
+                'height': image_img.height,
+                'figure_size': image_img.width * image_img.height,
+                'figure_aspect': image_img.width / image_img.height,
+            }
+        image_index += 1
+    json.dump(images, open(f'<{args.model_name_t}_{args.model_name_v}>_images_and_tables/{args.poster_name}_images.json', 'w'), indent=4)
+    json.dump(tables, open(f'<{args.model_name_t}_{args.model_name_v}>_images_and_tables/{args.poster_name}_tables.json', 'w'), indent=4)
+    return input_token, output_token, images, tables
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--poster_name', type=str, default=None)
+    parser.add_argument('--model_name', type=str, default='4o')
+    parser.add_argument('--poster_path', type=str, required=True)
+    parser.add_argument('--index', type=int, default=0)
+    args = parser.parse_args()
+    agent_config = get_agent_config(args.model_name)
+    if args.poster_name is None:
+        args.poster_name = args.poster_path.split('/')[-1].replace('.pdf', '').replace(' ', '_')
+    # Parse raw content
+    input_token, output_token = parse_raw(args, agent_config)
+    # Generate images and tables
+    _, _ = gen_image_and_table(args)
+    print(f'Token consumption: {input_token} -> {output_token}')

Paper2Poster/PosterAgent/poster_gen_pipeline.py ADDED Viewed

	@@ -0,0 +1,101 @@

+import argparse
+import time
+from utils.wei_utils import get_agent_config
+from PosterAgent.parse_raw import parse_raw, gen_image_and_table
+from PosterAgent.gen_outline_layout import filter_image_table, gen_outline_layout
+from PosterAgent.gen_poster_content import gen_poster_content
+from PosterAgent.fill_and_style import fill_poster_content, stylize_poster
+from PosterAgent.deoverflow import deoverflow
+from PosterAgent.apply_theme import poster_apply_theme
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--poster_name', type=str, default=None)
+    parser.add_argument('--model_name', type=str, default='4o')
+    parser.add_argument('--poster_path', type=str, required=True)
+    parser.add_argument('--index', type=int, default=0)
+    parser.add_argument('--template_path', type=str, default=None)
+    parser.add_argument('--max_retry', type=int, default=3)
+    args = parser.parse_args()
+    start_time = time.time()
+    actor_config = get_agent_config(args.model_name)
+    critic_config = get_agent_config(args.model_name)
+    if args.poster_name is None:
+        args.poster_name = args.poster_path.split('/')[-1].replace('.pdf', '').replace(' ', '_')
+    total_input_token, total_output_token = 0, 0
+    # Parse raw content
+    input_token, output_token = parse_raw(args, actor_config)
+    total_input_token += input_token
+    total_output_token += output_token
+    # Generate images and tables
+    _, _ = gen_image_and_table(args)
+    print()
+    print(f'Parsing token consumption: {input_token} -> {output_token}')
+    input_token, output_token = filter_image_table(args, actor_config)
+    total_input_token += input_token
+    total_output_token += output_token
+    print()
+    print(f'Filter images and tables token consumption: {input_token} -> {output_token}')
+    input_token, output_token = gen_outline_layout(args, actor_config, critic_config)
+    total_input_token += input_token
+    total_output_token += output_token
+    print()
+    print(f'Generate outline and layout token consumption: {input_token} -> {output_token}')
+    input_token, output_token = gen_poster_content(args, actor_config)
+    total_input_token += input_token
+    total_output_token += output_token
+    print()
+    print(f'Generate poster content token consumption: {input_token} -> {output_token}')
+    input_token, output_token = fill_poster_content(args, actor_config)
+    total_input_token += input_token
+    total_output_token += output_token
+    print()
+    print(f'Fill poster content token consumption: {input_token} -> {output_token}')
+    input_token, output_token = stylize_poster(args, actor_config)
+    total_input_token += input_token
+    total_output_token += output_token
+    print()
+    print(f'Stylize poster token consumption: {input_token} -> {output_token}')
+    input_token, output_token = deoverflow(args, actor_config, critic_config)
+    total_input_token += input_token
+    total_output_token += output_token
+    print()
+    print(f'Deoverflow token consumption: {input_token} -> {output_token}')
+    if args.template_path is not None:
+        input_token, output_token = poster_apply_theme(args, actor_config, critic_config)
+        total_input_token += input_token
+        total_output_token += output_token
+        print()
+        print(f'Apply theme token consumption: {input_token} -> {output_token}')
+    print()
+    print(f'Total token consumption: {total_input_token} -> {total_output_token}')
+    end_time = time.time()
+    elapsed_time = end_time - start_time
+    # Convert to hh:mm:ss format
+    hours, rem = divmod(elapsed_time, 3600)
+    minutes, seconds = divmod(rem, 60)
+    print(f"Execution Time: {int(hours):02}:{int(minutes):02}:{int(seconds):02}")
+    log_path = f'log/{args.model_name}_{args.poster_name}_{args.index}_log.txt'
+    with open(log_path, 'w') as f:
+        f.write(f'Total token consumption: {total_input_token} -> {total_output_token}\n')
+        f.write(f'Execution Time: {int(hours):02}:{int(minutes):02}:{int(seconds):02}\n')

Paper2Poster/PosterAgent/tree_split_layout.py ADDED Viewed

	@@ -0,0 +1,750 @@

+from lxml import etree
+import os
+import copy
+import glob
+import numpy as np
+from sklearn.linear_model import LinearRegression, LogisticRegression
+import matplotlib.pyplot as plt
+import matplotlib.patches as patches
+def parse_xml_with_recovery(xml_file_path):
+    parser = etree.XMLParser(recover=True)
+    tree = etree.parse(xml_file_path, parser)
+    return tree.getroot()
+def parse_poster_xml(xml_file):
+    """
+    Parse an XML describing a single poster layout, e.g.:
+    <Poster Width="685" Height="968">
+      <Panel left="5" right="160" width="674" height="123">
+        <Text>Introduction</Text>
+        <Figure left="567" right="178" width="81" height="99" no="1" ... />
+      </Panel>
+      ...
+    </Poster>
+    Returns a dict with:
+      {
+        'poster_width': float,
+        'poster_height': float,
+        'panels': [
+          {
+            'x': float,
+            'y': float,
+            'width': float,
+            'height': float,
+            'text_blocks': [string, string, ...],
+            'figure_blocks': [(fx, fy, fw, fh), ...]
+          },
+          ...
+        ]
+      }
+    """
+    root = parse_xml_with_recovery(xml_file)
+    # Poster dimensions
+    poster_w = float(root.get("Width", "1"))
+    poster_h = float(root.get("Height", "1"))
+    panels_data = []
+    # Iterate <Panel> elements
+    for panel_node in root.findall("Panel"):
+        x = float(panel_node.get("left", "0"))
+        y = float(panel_node.get("right", "0"))
+        w = float(panel_node.get("width", "0"))
+        h = float(panel_node.get("height", "0"))
+        # Gather text blocks
+        text_blocks = []
+        for text_node in panel_node.findall("Text"):
+            txt = text_node.text or ""
+            txt = txt.strip()
+            if txt:
+                text_blocks.append(txt)
+        # Gather figure blocks
+        figure_blocks = []
+        for fig_node in panel_node.findall("Figure"):
+            fx = float(fig_node.get("left", "0"))
+            fy = float(fig_node.get("right", "0"))
+            fw = float(fig_node.get("width", "0"))
+            fh = float(fig_node.get("height", "0"))
+            figure_blocks.append((fx, fy, fw, fh))
+        panel_info = {
+            "x": x,
+            "y": y,
+            "width": w,
+            "height": h,
+            "text_blocks": text_blocks,
+            "figure_blocks": figure_blocks
+        }
+        panels_data.append(panel_info)
+    return {
+        "poster_width": poster_w,
+        "poster_height": poster_h,
+        "panels": panels_data
+    }
+def compute_panel_attributes(poster_data):
+    """
+    Given poster_data, compute:
+      - tp: ratio of text length for each panel
+      - gp: ratio of figure area for each panel
+      - sp: ratio of panel area to total poster area
+      - rp: aspect ratio (width / height)
+    Returns a list of dicts, each:
+      {
+        'tp': float,
+        'gp': float,
+        'sp': float,
+        'rp': float
+      }
+    """
+    poster_w = poster_data["poster_width"]
+    poster_h = poster_data["poster_height"]
+    panels   = poster_data["panels"]
+    poster_area = max(poster_w * poster_h, 1.0)  # avoid zero
+    # 1) Compute total text length across all panels
+    # 2) Compute total figure area across all panels
+    total_text_length  = 0
+    total_figure_area  = 0
+    # We'll store partial info about each panel so we don't parse multiple times
+    panel_list = []
+    for p in panels:
+        # Combine all text
+        panel_text_joined = " ".join(p["text_blocks"])
+        panel_text_len = len(panel_text_joined)
+        # Sum area of figure blocks
+        panel_fig_area = 0.0
+        for (fx, fy, fw, fh) in p["figure_blocks"]:
+            panel_fig_area += (fw * fh)
+        panel_list.append({
+            "x": p["x"],
+            "y": p["y"],
+            "width": p["width"],
+            "height": p["height"],
+            "text_len": panel_text_len,
+            "fig_area": panel_fig_area
+        })
+        total_text_length  += panel_text_len
+        total_figure_area  += panel_fig_area
+    # Avoid divide by zero
+    if total_text_length < 1:
+        total_text_length = 1
+    if total_figure_area < 1e-9:
+        total_figure_area = 1e-9
+    # 3) Compute attributes
+    results = []
+    for pinfo in panel_list:
+        pw = pinfo["width"]
+        ph = pinfo["height"]
+        panel_area = pw * ph
+        sp = panel_area / poster_area  # fraction of total area
+        rp = (pw / ph) if ph > 0 else 1.0
+        tp = pinfo["text_len"] / float(total_text_length)
+        gp = pinfo["fig_area"] / float(total_figure_area)
+        results.append({
+            "tp": tp,
+            "gp": gp,
+            "sp": sp,
+            "rp": rp
+        })
+    return results
+def train_panel_attribute_inference(panel_records):
+    """
+    The training data `panel_records` is a list of dicts, each containing:
+      {
+        'tp': float,
+        'gp': float,
+        'sp': float,  # (label for the sp regression)
+        'rp': float   # (label for the rp regression)
+      }
+    We'll train two linear regressors:
+       sp = w_s * [tp, gp, 1]
+       rp = w_r * [tp, gp, 1]
+    Returns dict with learned parameters:
+      {
+        'w_s': array,  # shape (3,) => sp = w_s[0]*tp + w_s[1]*gp + w_s[2]
+        'sigma_s': float,  # variance of residual for sp
+        'w_r': array,
+        'sigma_r': float
+      }
+    """
+    # Build data arrays
+    X_list  = []
+    sp_list = []
+    rp_list = []
+    for rec in panel_records:
+        tp = rec['tp']
+        gp = rec['gp']
+        sp = rec['sp']
+        rp = rec['rp']
+        # X = [tp, gp, 1]
+        X_list.append([tp, gp, 1.0])
+        sp_list.append(sp)
+        rp_list.append(rp)
+    X_array = np.array(X_list, dtype=float)
+    y_sp = np.array(sp_list, dtype=float)
+    y_rp = np.array(rp_list, dtype=float)
+    # Fit linear regression for sp
+    linreg_sp = LinearRegression(fit_intercept=False)
+    linreg_sp.fit(X_array, y_sp)
+    w_s = linreg_sp.coef_
+    pred_sp = linreg_sp.predict(X_array)
+    residual_sp = y_sp - pred_sp
+    sigma_s = np.var(residual_sp, ddof=1)
+    # Fit linear regression for rp
+    linreg_rp = LinearRegression(fit_intercept=False)
+    linreg_rp.fit(X_array, y_rp)
+    w_r = linreg_rp.coef_
+    pred_rp = linreg_rp.predict(X_array)
+    residual_rp = y_rp - pred_rp
+    sigma_r = np.var(residual_rp, ddof=1)
+    model_params = {
+        "w_s": w_s,
+        "sigma_s": sigma_s,
+        "w_r": w_r,
+        "sigma_r": sigma_r
+    }
+    return model_params
+def parse_poster_xml_for_figures(xml_path):
+    root = parse_xml_with_recovery(xml_path)
+    poster_w = float(root.get("Width", "1"))
+    poster_h = float(root.get("Height", "1"))
+    poster_area = poster_w * poster_h
+    records = []
+    for panel in root.findall("Panel"):
+        px, py = float(panel.get("left", 0)), float(panel.get("right", 0))
+        pw, ph = float(panel.get("width", 1)), float(panel.get("height", 1))
+        panel_area = pw * ph
+        sp = panel_area / poster_area
+        rp = pw / ph if ph > 0 else 1.0
+        lp = sum(len(t.text.strip()) for t in panel.findall("Text") if t.text)
+        for fig in panel.findall("Figure"):
+            fx, fy = float(fig.get("left", 0)), float(fig.get("right", 0))
+            fw, fh = float(fig.get("width", 1)), float(fig.get("height", 1))
+            sg = (fw * fh) / poster_area
+            rg = fw / fh if fh > 0 else 1.0
+            ug = fw / pw if pw > 0 else 0.1
+            panel_center_x = px + pw / 2
+            fig_center_x = fx + fw / 2
+            delta_x = fig_center_x - panel_center_x
+            hg = 0 if delta_x < -pw / 6 else (2 if delta_x > pw / 6 else 1)
+            record = {"sp": sp, "rp": rp, "lp": lp, "sg": sg, "rg": rg, "hg": hg, "ug": ug}
+            records.append(record)
+    return records
+def train_figure_model(figure_records):
+    X_hg, y_hg, X_ug, y_ug = [], [], [], []
+    for r in figure_records:
+        feats = [r["sp"], r["lp"], r["sg"], 1.0]
+        X_hg.append(feats)
+        y_hg.append(r["hg"])
+        X_ug.append(feats)
+        y_ug.append(r["ug"])
+    clf_hg = LogisticRegression(multi_class="multinomial", solver="lbfgs", fit_intercept=False)
+    clf_hg.fit(X_hg, y_hg)
+    lin_ug = LinearRegression(fit_intercept=False)
+    lin_ug.fit(X_ug, y_ug)
+    residuals = y_ug - lin_ug.predict(X_ug)
+    sigma_u = np.var(residuals, ddof=1)
+    return {
+        "clf_hg": clf_hg,
+        "w_u": lin_ug.coef_,
+        "sigma_u": sigma_u
+    }
+def main_train():
+    poster_dataset_path = 'assets/poster_data/Train'
+    # loop through all folders in the dataset
+    xml_files = []
+    for folder in os.listdir(poster_dataset_path):
+        folder_path = os.path.join(poster_dataset_path, folder)
+        if os.path.isdir(folder_path):
+            # find all XML files in this folder
+            xml_files.extend(glob.glob(os.path.join(folder_path, "*.txt")))
+    all_panel_records = []
+    for xml_file in xml_files:
+        poster_data = parse_poster_xml(xml_file)
+        # compute tp, gp, sp, rp
+        panel_attrs = compute_panel_attributes(poster_data)
+        # each panel_attrs entry is {tp, gp, sp, rp}
+        all_panel_records.extend(panel_attrs)
+    all_figure_records = []
+    for xml_path in xml_files:
+        recs = parse_poster_xml_for_figures(xml_path)
+        all_figure_records.extend(recs)
+    panel_model_params = train_panel_attribute_inference(all_panel_records)
+    figure_model_params = train_figure_model(all_figure_records)
+    return panel_model_params, figure_model_params
+def place_text_and_figures_exact(panel_dict, figure_model_params, section_title_height=32):
+    """
+    Lay out text and figure boxes inside a panel.
+    The figure’s aspect ratio (width / height) is now enforced strictly:
+        • width  ≤ panel width
+        • height ≤ 0.60 × panel height      (empirical upper‑bound you already used)
+        ��� width / height == panel_dict["figure_aspect"]
+    """
+    # ---------------- Constants used for text layout -----------------
+    char_width_px  = 7
+    line_height_px = 16
+    chars_per_line = max(int(panel_dict["width"] / char_width_px), 1)
+    total_lines_text  = np.ceil(panel_dict["text_len"] / chars_per_line)
+    total_text_height = total_lines_text * line_height_px
+    x_p, y_p = panel_dict["x"], panel_dict["y"]
+    w_p, h_p = panel_dict["width"], panel_dict["height"]
+    figure_boxes, text_boxes = [], []
+    panel_name_lower = panel_dict["panel_name"].lower()
+    has_title_in_name = "title" in panel_name_lower
+    # -------------------------------------------------------
+    # Helper to build a text‑box dict
+    # -------------------------------------------------------
+    def make_text_box(panel_id, x, y, width, height, textbox_id, textbox_name):
+        return {
+            "panel_id":   panel_id,
+            "x":          float(x),
+            "y":          float(y),
+            "width":      float(width),
+            "height":     float(height),
+            "textbox_id": textbox_id,
+            "textbox_name": textbox_name,
+        }
+    # -----------------------------------------------------------------------
+    # Case 1 — no figure in this panel
+    # -----------------------------------------------------------------------
+    if panel_dict["figure_size"] <= 0:
+        if has_title_in_name:
+            text_boxes.append(
+                make_text_box(panel_dict["panel_id"], x_p, y_p, w_p, h_p,
+                              textbox_id=0,
+                              textbox_name=f'p<{panel_dict["panel_name"]}>_t0')
+            )
+        else:
+            title_h = min(section_title_height, h_p)
+            text_boxes.extend([
+                make_text_box(panel_dict["panel_id"], x_p, y_p, w_p, title_h,
+                              textbox_id=0,
+                              textbox_name=f'p<{panel_dict["panel_name"]}>_t0'),
+                make_text_box(panel_dict["panel_id"], x_p, y_p + title_h, w_p, h_p - title_h,
+                              textbox_id=1,
+                              textbox_name=f'p<{panel_dict["panel_name"]}>_t1'),
+            ])
+        return text_boxes, figure_boxes   # early‑return (simpler branch)
+    # -----------------------------------------------------------------------
+    # Case 2 — there *is* a figure
+    # -----------------------------------------------------------------------
+    # 1.  Sample horizontal‑alignment class (hg) and raw width fraction (ug)
+    feat      = np.array([panel_dict["sp"],
+                          panel_dict["text_len"],
+                          panel_dict["figure_size"],
+                          1.0]).reshape(1, -1)
+    clf_hg    = figure_model_params["clf_hg"]
+    hg_sample = int(np.argmax(clf_hg.predict_proba(feat)[0]))
+    mean_ug   = float(np.dot(figure_model_params["w_u"], feat.flatten()))
+    sigma_u   = float(np.sqrt(figure_model_params["sigma_u"]))
+    ug_sample = float(np.clip(np.random.normal(mean_ug, sigma_u), 0.10, 0.80))  # 10‑80 % of width
+    # 2.  **Size the figure while *preserving* aspect ratio**
+    aspect     = float(panel_dict["figure_aspect"])       # width / height
+    fig_w      = ug_sample * w_p                          # preliminary width
+    fig_h      = fig_w / aspect
+    max_fig_h  = 0.60 * h_p                               # same limit you had
+    if fig_h > max_fig_h:                                 # too tall → scale down
+        scale  = max_fig_h / fig_h
+        fig_w *= scale
+        fig_h  = max_fig_h        # (ratio still intact)
+    # 3.  Horizontal placement
+    if hg_sample == 0:          # left
+        fig_x = x_p
+    elif hg_sample == 2:        # right
+        fig_x = x_p + w_p - fig_w
+    else:                       # center
+        fig_x = x_p + 0.5 * (w_p - fig_w)
+    # Vertical centering
+    fig_y = y_p + 0.5 * (h_p - fig_h)
+    # 4.  Split text into “top” and “bottom” areas around the figure
+    top_text_h    = (fig_y - y_p)
+    bottom_text_h = (y_p + h_p) - (fig_y + fig_h)
+    # --- build top‑text boxes
+    if has_title_in_name:
+        text_boxes.append(
+            make_text_box(panel_dict["panel_id"], x_p, y_p, w_p, top_text_h,
+                          textbox_id=0,
+                          textbox_name=f'p<{panel_dict["panel_name"]}>_t0')
+        )
+        next_id = 1
+    else:
+        title_h = min(section_title_height, top_text_h)
+        text_boxes.extend([
+            make_text_box(panel_dict["panel_id"], x_p, y_p, w_p, title_h,
+                          textbox_id=0,
+                          textbox_name=f'p<{panel_dict["panel_name"]}>_t0'),
+            make_text_box(panel_dict["panel_id"], x_p, y_p + title_h, w_p, top_text_h - title_h,
+                          textbox_id=1,
+                          textbox_name=f'p<{panel_dict["panel_name"]}>_t1'),
+        ])
+        next_id = 2
+    # --- bottom text box
+    text_boxes.append(
+        make_text_box(panel_dict["panel_id"], x_p, fig_y + fig_h, w_p, bottom_text_h,
+                      textbox_id=next_id,
+                      textbox_name=f'p<{panel_dict["panel_name"]}>_t{next_id}')
+    )
+    # 5.  Figure box
+    figure_boxes.append({
+        "panel_id":   panel_dict["panel_id"],
+        "x":          float(fig_x),
+        "y":          float(fig_y),
+        "width":      float(fig_w),
+        "height":     float(fig_h),
+        "figure_id":  0,
+        "figure_name": f'p<{panel_dict["panel_name"]}>_f0',
+    })
+    return text_boxes, figure_boxes
+def to_inches(value_in_units, units_per_inch=72):
+    """
+    Convert a single coordinate or dimension from 'units' to inches.
+    For example, if your units are 'points' (72 points = 1 inch),
+    then units_per_inch=72.
+    If your units are 'pixels' at 96 DPI, then units_per_inch=96.
+    """
+    return value_in_units / units_per_inch
+def from_inches(value_in_inches, units_per_inch=72):
+    """
+    Convert from inches back to the original 'units'.
+    """
+    return value_in_inches * units_per_inch
+def softmax(logits):
+    s = sum(np.exp(logits))
+    return [np.exp(l)/s for l in logits]
+def infer_panel_attrs(panel_model, tp, gp):
+    # sp = w_s dot [tp, gp, 1]
+    # rp = w_r dot [tp, gp, 1]
+    vec = np.array([tp, gp, 1.0])
+    w_s = panel_model["w_s"]
+    w_r = panel_model["w_r"]
+    sp = np.dot(w_s, vec)
+    rp = np.dot(w_r, vec)
+    # clamp
+    sp = max(sp, 0.01)
+    rp = max(rp, 0.05)
+    return sp, rp
+def panel_layout_generation(panels, x, y, w, h):
+    # If only 1 panel, place it entirely
+    if len(panels) == 1:
+        p = panels[0]
+        cur_rp = (w/h) if h>1e-9 else p["rp"]
+        loss = abs(p["rp"] - cur_rp)
+        arrangement = [{
+            "panel_name": p["section_name"],
+            "panel_id": p["panel_id"],
+            "x": x, "y": y,
+            "width": w, "height": h
+        }]
+        return loss, arrangement
+    best_loss = float('inf')
+    best_arr = []
+    total_sp = sum(pp["sp"] for pp in panels)
+    n = len(panels)
+    for i in range(1, n):
+        subset1 = panels[:i]
+        subset2 = panels[i:]
+        sp1 = sum(pp["sp"] for pp in subset1)
+        ratio = sp1 / total_sp
+        # horizontal
+        h_top = ratio * h
+        if 0 < h_top < h:
+            l1, a1 = panel_layout_generation(subset1, x, y, w, h_top)
+            l2, a2 = panel_layout_generation(subset2, x, y + h_top, w, h - h_top)
+            if (l1 + l2) < best_loss:
+                best_loss = l1 + l2
+                best_arr = a1 + a2
+        # vertical
+        w_left = ratio * w
+        if 0 < w_left < w:
+            l1, a1 = panel_layout_generation(subset1, x, y, w_left, h)
+            l2, a2 = panel_layout_generation(subset2, x + w_left, y, w - w_left, h)
+            if (l1 + l2) < best_loss:
+                best_loss = l1 + l2
+                best_arr = a1 + a2
+    return best_loss, best_arr
+def split_textbox(textbox, ratio):
+    """
+    Splits a textbox dictionary horizontally into two parts.
+    Parameters:
+      textbox (dict): A dictionary with the keys
+                      'panel_id', 'x', 'y', 'width', 'height', 'textbox_id', 'textbox_name'
+      ratio (float or int): Ratio of top height to bottom height.
+                            For example, if ratio is 3, then:
+                              top_height = (3/4) * height
+                              bottom_height = (1/4) * height
+    Returns:
+      tuple: Two dictionaries corresponding to the top and bottom split textboxes.
+    """
+    # Calculate the new heights
+    total_ratio = ratio + 1  # because the ratio represents top:bottom as (ratio):(1)
+    top_height = textbox['height'] * ratio / total_ratio
+    bottom_height = textbox['height'] * 1 / total_ratio
+    # Derive the base textbox name by splitting off the existing _t suffix if present.
+    # This assumes the original textbox_name ends with "_t<number>".
+    base_name = textbox['textbox_name'].rsplit('_t', 1)[0]
+    # Create the top textbox dictionary
+    top_box = dict(textbox)  # make a shallow copy
+    top_box['height'] = top_height
+    # y remains the same for the top textbox
+    top_box['textbox_name'] = f"{base_name}_t0"  # rename with _t0
+    # Create the bottom textbox dictionary
+    bottom_box = dict(textbox)  # make a shallow copy
+    bottom_box['y'] = textbox['y'] + top_height  # adjust the y position
+    bottom_box['height'] = bottom_height
+    bottom_box['textbox_name'] = f"{base_name}_t1"  # rename with _t1
+    return top_box, bottom_box
+def generate_constrained_layout(paper_panels, poster_w, poster_h, title_height_ratio=0.1):
+    # Find title panel explicitly
+    try:
+        title_panel = next(p for p in paper_panels if ('title' in p["section_name"].lower()))
+        other_panels = [p for p in paper_panels if ('title' not in p["section_name"].lower())]
+    except StopIteration:
+        print('Oops, no title found, please try again.')
+        raise
+    title_h = poster_h * title_height_ratio
+    title_layout = {
+        "panel_name": title_panel["section_name"],
+        "panel_id": title_panel["panel_id"],
+        "x": 0, "y": 0,
+        "width": poster_w, "height": title_h
+    }
+    # Generate recursive layout on remaining space for other panels
+    layout_loss, remaining_layout = panel_layout_generation(
+        other_panels,
+        x=0, y=title_h,
+        w=poster_w, h=poster_h - title_h
+    )
+    # Combine title panel with others
+    complete_layout = [title_layout] + remaining_layout
+    return layout_loss, complete_layout
+def main_inference(
+    paper_panels,
+    panel_model_params,
+    figure_model_params,
+    poster_width=1200,
+    poster_height=800,
+    shrink_margin=0
+):
+    for p in paper_panels:
+        sp, rp = infer_panel_attrs(panel_model_params, p["tp"], p["gp"])
+        p["sp"] = sp
+        p["rp"] = rp
+    layout_loss, panel_arrangement = generate_constrained_layout(paper_panels, poster_width, poster_height, title_height_ratio=0.1)
+    print("Panel layout cost:", layout_loss)
+    for p in panel_arrangement:
+        print("Panel:", p)
+    panel_map = {}
+    for p in paper_panels:
+        panel_map[p["panel_id"]] = p
+    final_panels = []
+    for pa in panel_arrangement:
+        # Merge bounding box with the original sp,rp data
+        pid = pa["panel_id"]
+        merged_panel = {
+            "panel_id": pid,
+            "panel_name": pa['panel_name'],
+            "x": pa["x"] + shrink_margin,
+            "y": pa["y"] + shrink_margin,
+            "width": pa["width"] - 2 * shrink_margin,
+            "height": pa["height"] - 2 * shrink_margin,
+            "sp": panel_map[pid]["sp"],
+            "rp": panel_map[pid]["rp"],
+            "text_len": panel_map[pid]["text_len"],
+            "figure_size": panel_map[pid]["figure_size"],
+            "figure_aspect": panel_map[pid]["figure_aspect"]
+        }
+        final_panels.append(merged_panel)
+    text_arrangement = []
+    figure_arrangement = []
+    for p in final_panels:
+        text_boxes, fig_boxes = place_text_and_figures_exact(p, figure_model_params)
+        text_arrangement.extend(text_boxes)          # text arrangement
+        figure_arrangement.extend(fig_boxes)       # figure arrangement
+    return panel_arrangement, figure_arrangement, text_arrangement
+def visualize_complete_layout(
+    panels, text_boxes, figure_boxes, poster_width, poster_height
+):
+    fig, ax = plt.subplots(figsize=(12,8))
+    ax.set_xlim(0, poster_width)
+    ax.set_ylim(0, poster_height)
+    ax.set_aspect('equal')
+    # Draw panels
+    for panel in panels:
+        rect = patches.Rectangle(
+            (panel["x"], panel["y"]), panel["width"], panel["height"],
+            linewidth=1, edgecolor='black', facecolor='none'
+        )
+        ax.add_patch(rect)
+        ax.text(
+            panel["x"] + 5, panel["y"] + panel["height"] - 5,
+            f'Panel {panel["panel_id"]}', fontsize=8, va='top', color='black'
+        )
+    # Draw text boxes
+    for txt in text_boxes:
+        rect = patches.Rectangle(
+            (txt["x"], txt["y"]), txt["width"], txt["height"],
+            linewidth=1, edgecolor='green', linestyle='-.', facecolor='none'
+        )
+        ax.add_patch(rect)
+        ax.text(
+            txt["x"] + 2, txt["y"] + txt["height"] - 2,
+            f'Text {txt["panel_id"]}', fontsize=7, color='green', va='top'
+        )
+    # Draw figures
+    for fig_box in figure_boxes:
+        rect = patches.Rectangle(
+            (fig_box["x"], fig_box["y"]), fig_box["width"], fig_box["height"],
+            linewidth=1, edgecolor='blue', linestyle='--', facecolor='none'
+        )
+        ax.add_patch(rect)
+        ax.text(
+            fig_box["x"] + 2, fig_box["y"] + 2,
+            f'Fig {fig_box["panel_id"]}', fontsize=7, color='blue', va='bottom'
+        )
+    plt.gca().invert_yaxis()  # optional: invert y-axis if needed
+    plt.show()
+def get_arrangments_in_inches(
+    width,
+    height,
+    panel_arrangement,
+    figure_arrangement,
+    text_arrangement,
+    units_per_inch=72
+):
+    panel_arrangement_inches = copy.deepcopy(panel_arrangement)
+    figure_arrangement_inches = copy.deepcopy(figure_arrangement)
+    text_arrangement_inches = copy.deepcopy(text_arrangement)
+    for p in panel_arrangement_inches:
+        p["x"] = to_inches(p["x"], units_per_inch)
+        p["y"] = to_inches(p["y"], units_per_inch)
+        p["width"] = to_inches(p["width"], units_per_inch)
+        p["height"] = to_inches(p["height"], units_per_inch)
+    for f in figure_arrangement_inches:
+        f["x"] = to_inches(f["x"], units_per_inch)
+        f["y"] = to_inches(f["y"], units_per_inch)
+        f["width"] = to_inches(f["width"], units_per_inch)
+        f["height"] = to_inches(f["height"], units_per_inch)
+    for t in text_arrangement_inches:
+        t["x"] = to_inches(t["x"], units_per_inch)
+        t["y"] = to_inches(t["y"], units_per_inch)
+        t["width"] = to_inches(t["width"], units_per_inch)
+        t["height"] = to_inches(t["height"], units_per_inch)
+    width_inch, height_inch = to_inches(width, units_per_inch), to_inches(height, units_per_inch)
+    return width_inch, height_inch, panel_arrangement_inches, figure_arrangement_inches, text_arrangement_inches

Paper2Poster/README.md ADDED Viewed

	@@ -0,0 +1,215 @@

+# 🎓Paper2Poster: Multimodal Poster Automation from Scientific Papers
+# 从学术论文自动生成学术海报
+<p align="center">
+  <a href="https://arxiv.org/abs/2505.21497" target="_blank"><img src="https://img.shields.io/badge/arXiv-2505.21497-red"></a>
+  <a href="https://paper2poster.github.io/" target="_blank"><img src="https://img.shields.io/badge/Project-Page-brightgreen"></a>
+  <a href="https://huggingface.co/datasets/Paper2Poster/Paper2Poster" target="_blank"><img src="https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Dataset-orange"></a>
+  <a href="https://huggingface.co/papers/2505.21497" target="_blank"><img src="https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Daily Papers-red"></a>
+  <a href="https://x.com/_akhaliq/status/1927721150584390129" target="_blank"><img alt="X (formerly Twitter) URL" src="https://img.shields.io/twitter/url?url=https%3A%2F%2Fx.com%2F_akhaliq%2Fstatus%2F1927721150584390129"></a>
+</p>
+We address **How to create a poster from a paper** and **How to evaluate poster.**
+![Overview](./assets/overall.png)
+## 🔥 Update
+- [x] [2025.10.13] Added automatic **logo support** for conferences and institutions, **YAML-based style customization**, a new default theme.
+- [x] [2025.9.18] Paper2Poster has been accepted to **NeurIPS 2025 Dataset and Benchmark Track**.
+- [x] [2025.9.3]  We now support generate per section content in **parallel** for faster generation, by simply specifying `--max_workers`.
+- [x] [2025.5.27] We release the [arXiv](https://arxiv.org/abs/2505.21497), [code](https://github.com/Paper2Poster/Paper2Poster) and [`dataset`](https://huggingface.co/datasets/Paper2Poster/Paper2Poster)
+<!--## 📚 Introduction-->
+**PosterAgent** is a top-down, visual-in-the-loop multi-agent system from `paper.pdf` to **editable** `poster.pptx`.
+![PosterAgent Overview](./assets/posteragent.png)
+<!--A Top-down, visual-in-the-loop, efficient multi-agent pipeline, which includes (a) Parser distills the paper into a structured asset library; the (b) Planner aligns text–visual pairs into a binary‐tree layout that preserves reading order and spatial balance; and the (c) Painter-Commentor loop refines each panel by executing rendering code and using VLM feedback to eliminate overflow and ensure alignment.-->
+<!--![Paper2Poster Overview](./assets/paperquiz.png)-->
+<!--**Paper2Poster:** A benchmark for paper to poster generation, paired with human generated poster, with a comprehensive evaluation suite, including metrics like **Visual Quality**, **Textual Coherence**, **VLM-as-Judge** and **PaperQuiz**. Notably, PaperQuiz is a novel evaluation which assume A Good poster should convey core paper content visually.-->
+## 📋 Table of Contents
+<!--- [📚 Introduction](#-introduction)-->
+- [🛠️ Installation](#-installation)
+- [🚀 Quick Start](#-quick-start)
+- [🔮 Evaluation](#-evaluation)
+---
+## 🛠️ Installation
+Our Paper2Poster supports both local deployment (via [vLLM](https://docs.vllm.ai/en/v0.6.6/getting_started/installation.html)) or API-based access (e.g., GPT-4o).
+**Python Environment**
+```bash
+pip install -r requirements.txt
+```
+**Install Libreoffice**
+```bash
+sudo apt install libreoffice
+```
+or, if you do **not** have sudo access, download `soffice` executable directly: https://www.libreoffice.org/download/download-libreoffice/, and add the executable directory to your `$PATH`.
+**Install poppler**
+```bash
+conda install -c conda-forge poppler
+```
+**API Key**
+Create a `.env` file in the project root and add your OpenAI API key:
+```bash
+OPENAI_API_KEY=<your_openai_api_key>
+```
+**Optional: Google Search API (for logo search)**
+To use Google Custom Search for more reliable logo search, add these to your `.env` file:
+```bash
+GOOGLE_SEARCH_API_KEY=<your_google_search_api_key>
+GOOGLE_SEARCH_ENGINE_ID=<your_search_engine_id>
+```
+---
+## 🚀 Quick Start
+Create a folder named `{paper_name}` under `{dataset_dir}`, and place your paper inside it as a PDF file named `paper.pdf`.
+```
+📁 {dataset_dir}/
+└── 📁 {paper_name}/
+    └── 📄 paper.pdf
+```
+To use open-source models, you need to first deploy them using [vLLM](https://docs.vllm.ai/en/v0.6.6/getting_started/installation.html), ensuring the port is correctly specified in the `get_agent_config()` function in [`utils/wei_utils.py`](utils/wei_utils.py).
+- [High Performance] Generate a poster with `GPT-4o`:
+```bash
+python -m PosterAgent.new_pipeline \
+    --poster_path="${dataset_dir}/${paper_name}/paper.pdf" \
+    --model_name_t="4o" \  # LLM
+    --model_name_v="4o" \  # VLM
+    --poster_width_inches=48 \
+    --poster_height_inches=36
+```
+- [Economic] Generate a poster with `Qwen-2.5-7B-Instruct` and `GPT-4o`:
+```bash
+python -m PosterAgent.new_pipeline \
+    --poster_path="${dataset_dir}/${paper_name}/paper.pdf" \
+    --model_name_t="vllm_qwen" \  # LLM
+    --model_name_v="4o" \         # VLM
+    --poster_width_inches=48 \
+    --poster_height_inches=36 \
+    --no_blank_detection          # An option to disable blank detection
+```
+- [Local] Generate a poster with `Qwen-2.5-7B-Instruct`:
+```bash
+python -m PosterAgent.new_pipeline \
+    --poster_path="${dataset_dir}/${paper_name}/paper.pdf" \
+    --model_name_t="vllm_qwen" \           # LLM
+    --model_name_v="vllm_qwen_vl" \        # VLM
+    --poster_width_inches=48 \
+    --poster_height_inches=36
+```
+PosterAgent **supports flexible combination of LLM / VLM**, feel free to try other options, or customize your own settings in `get_agent_config()` in [`utils/wei_utils.py`](utils/wei_utils.py).
+### Adding Logos to Posters
+You can automatically add institutional and conference logos to your posters:
+```bash
+python -m PosterAgent.new_pipeline \
+    --poster_path="${dataset_dir}/${paper_name}/paper.pdf" \
+    --model_name_t="4o" \
+    --model_name_v="4o" \
+    --poster_width_inches=48 \
+    --poster_height_inches=36 \
+    --conference_venue="NeurIPS"  # Automatically searches for conference logo
+```
+**Logo Search Strategy:**
+1. **Local search**: First checks the provided logo store (`logo_store/institutes/` and `logo_store/conferences/`)
+2. **Web search**: If not found locally, performs online search
+   - By default, uses DuckDuckGo (no API key required)
+   - For more reliable results, use `--use_google_search` (requires `GOOGLE_SEARCH_API_KEY` and `GOOGLE_SEARCH_ENGINE_ID` in `.env`)
+You can also specify custom logo paths to skip auto-detection:
+```bash
+--institution_logo_path="path/to/institution_logo.png" \
+--conference_logo_path="path/to/conference_logo.png"
+```
+### YAML Style Customization
+Customize poster appearance via YAML configuration files:
+- **Global defaults**: `config/poster.yaml` (applies to all posters)
+- **Per-poster override**: Place `poster.yaml` next to your `paper.pdf` for custom styling
+## 🔮 Evaluation
+Download Paper2Poster evaluation dataset via:
+```bash
+python -m PosterAgent.create_dataset
+```
+In evaluation, papers are stored under a directory called `Paper2Poster-data`.
+To evaluate a generated poster with **PaperQuiz**:
+```bash
+python -m Paper2Poster-eval.eval_poster_pipeline \
+    --paper_name="${paper_name}" \
+    --poster_method="${model_t}_${model_v}_generated_posters" \
+    --metric=qa # PaperQuiz
+```
+To evaluate a generated poster with **VLM-as-Judge**:
+```bash
+python -m Paper2Poster-eval.eval_poster_pipeline \
+    --paper_name="${paper_name}" \
+    --poster_method="${model_t}_${model_v}_generated_posters" \
+    --metric=judge # VLM-as-Judge
+```
+To evaluate a generated poster with other statistical metrics (such as visual similarity, PPL, etc):
+```bash
+python -m Paper2Poster-eval.eval_poster_pipeline \
+    --paper_name="${paper_name}" \
+    --poster_method="${model_t}_${model_v}_generated_posters" \
+    --metric=stats # statistical measures
+```
+If you want to create a PaperQuiz for your own paper:
+```bash
+python -m Paper2Poster-eval.create_paper_questions \
+    --paper_folder="Paper2Poster-data/${paper_name}"
+```
+## ❤ Acknowledgement
+We extend our gratitude to [🐫CAMEL](https://github.com/camel-ai/camel), [🦉OWL](https://github.com/camel-ai/owl), [Docling](https://github.com/docling-project/docling), [PPTAgent](https://github.com/icip-cas/PPTAgent) for providing their codebases.
+## 📖 Citation
+Please kindly cite our paper if you find this project helpful.
+```bibtex
+@misc{paper2poster,
+      title={Paper2Poster: Towards Multimodal Poster Automation from Scientific Papers},
+      author={Wei Pang and Kevin Qinghong Lin and Xiangru Jian and Xi He and Philip Torr},
+      year={2025},
+      eprint={2505.21497},
+      archivePrefix={arXiv},
+      primaryClass={cs.CV},
+      url={https://arxiv.org/abs/2505.21497},
+}
+```

Paper2Poster/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@

+import sys, os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), "camel"))
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), "docling"))

Paper2Poster/camel/__init__.py ADDED Viewed

	@@ -0,0 +1,25 @@

+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+from camel.logger import disable_logging, enable_logging, set_log_level
+__version__ = '0.2.19'
+__all__ = [
+    '__version__',
+    'camel',
+    'disable_logging',
+    'enable_logging',
+    'set_log_level',
+]

Paper2Poster/camel/agents/__init__.py ADDED Viewed

	@@ -0,0 +1,44 @@

+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+from .base import BaseAgent
+from .chat_agent import ChatAgent
+from .critic_agent import CriticAgent
+from .embodied_agent import EmbodiedAgent
+from .knowledge_graph_agent import KnowledgeGraphAgent
+from .role_assignment_agent import RoleAssignmentAgent
+from .search_agent import SearchAgent
+from .task_agent import (
+    TaskCreationAgent,
+    TaskPlannerAgent,
+    TaskPrioritizationAgent,
+    TaskSpecifyAgent,
+)
+from .tool_agents.base import BaseToolAgent
+from .tool_agents.hugging_face_tool_agent import HuggingFaceToolAgent
+__all__ = [
+    'BaseAgent',
+    'ChatAgent',
+    'TaskSpecifyAgent',
+    'TaskPlannerAgent',
+    'TaskCreationAgent',
+    'TaskPrioritizationAgent',
+    'CriticAgent',
+    'BaseToolAgent',
+    'HuggingFaceToolAgent',
+    'EmbodiedAgent',
+    'RoleAssignmentAgent',
+    'SearchAgent',
+    'KnowledgeGraphAgent',
+]

Paper2Poster/camel/agents/base.py ADDED Viewed

	@@ -0,0 +1,29 @@

+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+from abc import ABC, abstractmethod
+from typing import Any
+class BaseAgent(ABC):
+    r"""An abstract base class for all CAMEL agents."""
+    @abstractmethod
+    def reset(self, *args: Any, **kwargs: Any) -> Any:
+        r"""Resets the agent to its initial state."""
+        pass
+    @abstractmethod
+    def step(self, *args: Any, **kwargs: Any) -> Any:
+        r"""Performs a single step of the agent."""
+        pass

Paper2Poster/camel/agents/chat_agent.py ADDED Viewed

	@@ -0,0 +1,1539 @@

+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+from __future__ import annotations
+import json
+import logging
+import re
+import uuid
+from collections import defaultdict
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Callable,
+    Dict,
+    List,
+    Optional,
+    Tuple,
+    Type,
+    Union,
+)
+from openai.types.chat import ChatCompletionMessageToolCall
+from openai.types.chat.chat_completion_message_tool_call import Function
+from pydantic import BaseModel, ValidationError
+from camel.agents.base import BaseAgent
+from camel.memories import (
+    AgentMemory,
+    ChatHistoryMemory,
+    MemoryRecord,
+    ScoreBasedContextCreator,
+)
+from camel.messages import BaseMessage, FunctionCallingMessage, OpenAIMessage
+from camel.models import (
+    BaseModelBackend,
+    ModelFactory,
+    ModelManager,
+    ModelProcessingError,
+)
+from camel.responses import ChatAgentResponse
+from camel.types import (
+    ChatCompletion,
+    ChatCompletionChunk,
+    ModelPlatformType,
+    ModelType,
+    OpenAIBackendRole,
+    RoleType,
+)
+from camel.utils import (
+    func_string_to_callable,
+    generate_prompt_for_structured_output,
+    get_model_encoding,
+    get_pydantic_object_schema,
+    json_to_function_code,
+)
+if TYPE_CHECKING:
+    from openai import Stream
+    from camel.terminators import ResponseTerminator
+    from camel.toolkits import FunctionTool
+logger = logging.getLogger(__name__)
+# AgentOps decorator setting
+try:
+    import os
+    if os.getenv("AGENTOPS_API_KEY") is not None:
+        from agentops import track_agent
+    else:
+        raise ImportError
+except (ImportError, AttributeError):
+    from camel.utils import track_agent
+class FunctionCallingRecord(BaseModel):
+    r"""Historical records of functions called in the conversation.
+    Attributes:
+        func_name (str): The name of the function being called.
+        args (Dict[str, Any]): The dictionary of arguments passed to
+            the function.
+        result (Any): The execution result of calling this function.
+        tool_call_id (str): The ID of the tool call, if available.
+    """
+    func_name: str
+    args: Dict[str, Any]
+    result: Any
+    tool_call_id: str
+    def __str__(self) -> str:
+        r"""Overridden version of the string function.
+        Returns:
+            str: Modified string to represent the function calling.
+        """
+        return (
+            f"Function Execution: {self.func_name}\n"
+            f"\tArgs: {self.args}\n"
+            f"\tResult: {self.result}\n"
+        )
+    def as_dict(self) -> dict[str, Any]:
+        r"""Returns the function calling record as a dictionary.
+        Returns:
+            dict[str, Any]: The function calling record as a dictionary.
+        """
+        return self.model_dump()
+@track_agent(name="ChatAgent")
+class ChatAgent(BaseAgent):
+    r"""Class for managing conversations of CAMEL Chat Agents.
+    Args:
+        system_message (Union[BaseMessage, str], optional): The system message
+            for the chat agent.
+        model (BaseModelBackend, optional): The model backend to use for
+            generating responses. (default: :obj:`ModelPlatformType.DEFAULT`
+            with `ModelType.DEFAULT`)
+        memory (AgentMemory, optional): The agent memory for managing chat
+            messages. If `None`, a :obj:`ChatHistoryMemory` will be used.
+            (default: :obj:`None`)
+        message_window_size (int, optional): The maximum number of previous
+            messages to include in the context window. If `None`, no windowing
+            is performed. (default: :obj:`None`)
+        token_limit (int, optional): The maximum number of tokens in a context.
+            The context will be automatically pruned to fulfill the limitation.
+            If `None`, it will be set according to the backend model.
+            (default: :obj:`None`)
+        output_language (str, optional): The language to be output by the
+            agent. (default: :obj:`None`)
+        tools (Optional[List[Union[FunctionTool, Callable]]], optional): List
+            of available :obj:`FunctionTool` or :obj:`Callable`. (default:
+            :obj:`None`)
+        external_tools (Optional[List[Union[FunctionTool, Callable]]],
+            optional): List of external tools (:obj:`FunctionTool` or or
+            :obj:`Callable`) bind to one chat agent. When these tools are
+            called, the agent will directly return the request instead of
+            processing it. (default: :obj:`None`)
+        response_terminators (List[ResponseTerminator], optional): List of
+            :obj:`ResponseTerminator` bind to one chat agent.
+            (default: :obj:`None`)
+        scheduling_strategy (str): name of function that defines how to select
+            the next model in ModelManager. (default: :str:`round_robin`)
+        single_iteration (bool): Whether to let the agent perform only one
+            model calling at each step. (default: :obj:`False`)
+    """
+    def __init__(
+        self,
+        system_message: Optional[Union[BaseMessage, str]] = None,
+        model: Optional[
+            Union[BaseModelBackend, List[BaseModelBackend]]
+        ] = None,
+        memory: Optional[AgentMemory] = None,
+        message_window_size: Optional[int] = None,
+        token_limit: Optional[int] = None,
+        output_language: Optional[str] = None,
+        tools: Optional[List[Union[FunctionTool, Callable]]] = None,
+        external_tools: Optional[List[Union[FunctionTool, Callable]]] = None,
+        response_terminators: Optional[List[ResponseTerminator]] = None,
+        scheduling_strategy: str = "round_robin",
+        single_iteration: bool = False,
+    ) -> None:
+        # Initialize the system message, converting string to BaseMessage if needed
+        if isinstance(system_message, str):
+            system_message = BaseMessage.make_assistant_message(
+                role_name='Assistant', content=system_message
+            )
+        self.orig_sys_message: Optional[BaseMessage] = system_message
+        self._system_message: Optional[BaseMessage] = system_message
+        self.role_name: str = (
+            getattr(system_message, 'role_name', None) or "assistant"
+        )
+        self.role_type: RoleType = (
+            getattr(system_message, 'role_type', None) or RoleType.ASSISTANT
+        )
+        self.model_backend = ModelManager(
+            model
+            if model is not None
+            else ModelFactory.create(
+                model_platform=ModelPlatformType.DEFAULT,
+                model_type=ModelType.DEFAULT,
+            ),
+            scheduling_strategy=scheduling_strategy,
+        )
+        self.model_type = self.model_backend.model_type
+        # Initialize tools
+        self.tools: List[FunctionTool] = (
+            self._initialize_tools(tools) if tools else []
+        )
+        self.external_tools: List[FunctionTool] = (
+            self._initialize_tools(external_tools) if external_tools else []
+        )
+        self.external_tool_names: List[str] = [
+            tool.get_function_name() for tool in self.external_tools
+        ]
+        self.all_tools = self.tools + self.external_tools or []
+        # Create tool dictionaries and configure backend tools if necessary
+        self.tool_dict = {
+            tool.get_function_name(): tool for tool in self.all_tools
+        }
+        # If the user set tools from `ChatAgent`, it will override the
+        # configured tools in `BaseModelBackend`.
+        if self.all_tools:
+            logger.warning(
+                "Overriding the configured tools in `BaseModelBackend` with the tools from `ChatAgent`."
+            )
+            tool_schema_list = [
+                tool.get_openai_tool_schema() for tool in self.all_tools
+            ]
+            self.model_backend.model_config_dict['tools'] = tool_schema_list
+        self.model_token_limit = token_limit or self.model_backend.token_limit
+        context_creator = ScoreBasedContextCreator(
+            self.model_backend.token_counter,
+            self.model_token_limit,
+        )
+        self.memory: AgentMemory = memory or ChatHistoryMemory(
+            context_creator, window_size=message_window_size
+        )
+        self.output_language: Optional[str] = output_language
+        if self.output_language is not None:
+            self.set_output_language(self.output_language)
+        self.terminated: bool = False
+        self.response_terminators = response_terminators or []
+        self.init_messages()
+        self.tool_prompt_added = False
+        self.single_iteration = single_iteration
+    def _initialize_tools(
+        self, tools: List[Union[FunctionTool, Callable]]
+    ) -> List[FunctionTool]:
+        r"""Helper method to initialize tools as FunctionTool instances."""
+        from camel.toolkits import FunctionTool
+        func_tools = []
+        for tool in tools:
+            if not isinstance(tool, FunctionTool):
+                tool = FunctionTool(tool)
+            func_tools.append(tool)
+        return func_tools
+    def add_tool(
+        self, tool: Union[FunctionTool, Callable], is_external: bool = False
+    ) -> None:
+        r"""Add a tool to the agent, specifying if it's an external tool."""
+        # Initialize the tool
+        initialized_tool = self._initialize_tools([tool])
+        # Update tools or external tools based on is_external flag
+        if is_external:
+            self.external_tools = self.external_tools + initialized_tool
+            self.external_tool_names.extend(
+                tool.get_function_name() for tool in initialized_tool
+            )
+        else:
+            self.tools = self.tools + initialized_tool
+        # Rebuild all_tools, and tool_dict
+        self.all_tools = self.tools + self.external_tools
+        self.tool_dict = {
+            tool.get_function_name(): tool for tool in self.all_tools
+        }
+        tool_schema_list = [
+            tool.get_openai_tool_schema() for tool in self.all_tools
+        ]
+        self.model_backend.model_config_dict['tools'] = tool_schema_list
+    def remove_tool(self, tool_name: str, is_external: bool = False) -> bool:
+        r"""Remove a tool by name, specifying if it's an external tool."""
+        tool_list = self.external_tools if is_external else self.tools
+        if not tool_list:
+            return False
+        for tool in tool_list:
+            if tool.get_function_name() == tool_name:
+                tool_list.remove(tool)
+                if is_external:
+                    self.external_tool_names.remove(tool_name)
+                # Reinitialize the tool dictionary
+                self.all_tools = (self.tools or []) + (
+                    self.external_tools or []
+                )
+                self.tool_dict = {
+                    tool.get_function_name(): tool for tool in self.all_tools
+                }
+                tool_schema_list = [
+                    tool.get_openai_tool_schema() for tool in self.all_tools
+                ]
+                self.model_backend.model_config_dict['tools'] = (
+                    tool_schema_list
+                )
+                return True
+        return False
+    def list_tools(self) -> dict:
+        r"""List all tools, separated into normal and external tools."""
+        normal_tools = [
+            tool.get_function_name() for tool in (self.tools or [])
+        ]
+        external_tools = [
+            tool.get_function_name() for tool in (self.external_tools or [])
+        ]
+        return {"normal_tools": normal_tools, "external_tools": external_tools}
+    # ruff: noqa: E501
+    def _generate_tool_prompt(self, tool_schema_list: List[Dict]) -> str:
+        r"""Generates a tool prompt based on the provided tool schema list.
+        Args:
+            tool_schema_list (List[Dict]): A list of dictionaries, each
+                containing a tool schema.
+        Returns:
+            str: A string representing the tool prompt.
+        """
+        tool_prompts = []
+        for tool in tool_schema_list:
+            tool_info = tool['function']
+            tool_name = tool_info['name']
+            tool_description = tool_info['description']
+            tool_json = json.dumps(tool_info, indent=4)
+            prompt = f"Use the function '{tool_name}' to '{tool_description}':\n{tool_json}\n"
+            tool_prompts.append(prompt)
+        tool_prompt_str = "\n".join(tool_prompts)
+        final_prompt = f"""
+    You have access to the following functions:
+    {tool_prompt_str}
+    If you choose to call a function ONLY reply in the following format with no
+    prefix or suffix:
+    <function=example_function_name>{{"example_name": "example_value"}}</function>
+    Reminder:
+    - Function calls MUST follow the specified format, start with <function= and end with </function>
+    - Required parameters MUST be specified
+    - Only call one function at a time
+    - Put the entire function call reply on one line
+    - If there is no function call available, answer the question like normal
+      with your current knowledge and do not tell the user about function calls
+    """
+        return final_prompt
+    def _parse_tool_response(self, response: str):
+        r"""Parses the tool response to extract the function name and
+        arguments.
+        Args:
+            response (str): The response from the model containing the
+                function call.
+        Returns:
+            Optional[Dict[str, Any]]: The parsed function name and arguments
+                if found, otherwise :obj:`None`.
+        """
+        function_regex = r"<function=(\w+)>(.*?)</function>"
+        match = re.search(function_regex, response)
+        if match:
+            function_name, args_string = match.groups()
+            try:
+                args = json.loads(args_string)
+                return {"function": function_name, "arguments": args}
+            except json.JSONDecodeError as error:
+                logger.error(f"Error parsing function arguments: {error}")
+                return None
+        return None
+    def reset(self):
+        r"""Resets the :obj:`ChatAgent` to its initial state."""
+        self.terminated = False
+        self.init_messages()
+        for terminator in self.response_terminators:
+            terminator.reset()
+    @property
+    def system_message(self) -> Optional[BaseMessage]:
+        r"""The getter method for the property :obj:`system_message`.
+        Returns:
+            Optional[BaseMessage]: The system message of this agent if set,
+                else :obj:`None`.
+        """
+        return self._system_message
+    @system_message.setter
+    def system_message(self, message: BaseMessage) -> None:
+        r"""The setter method for the property :obj:`system_message`.
+        Args:
+            message (BaseMessage): The message to be set as the
+                new system message of this agent.
+        """
+        self._system_message = message
+    def is_tools_added(self) -> bool:
+        r"""Whether tool calling is enabled for this agent.
+        Returns:
+            bool: Whether tool calling is enabled for this agent, determined
+                by whether the dictionary of tools is empty.
+        """
+        return len(self.tool_dict) > 0
+    def update_memory(
+        self, message: BaseMessage, role: OpenAIBackendRole
+    ) -> None:
+        r"""Updates the agent memory with a new message.
+        Args:
+            message (BaseMessage): The new message to add to the stored
+                messages.
+            role (OpenAIBackendRole): The backend role type.
+        """
+        self.memory.write_record(
+            MemoryRecord(message=message, role_at_backend=role)
+        )
+    def set_output_language(self, output_language: str) -> BaseMessage:
+        r"""Sets the output language for the system message. This method
+        updates the output language for the system message. The output
+        language determines the language in which the output text should be
+        generated.
+        Args:
+            output_language (str): The desired output language.
+        Returns:
+            BaseMessage: The updated system message object.
+        """
+        self.output_language = output_language
+        language_prompt = (
+            "\nRegardless of the input language, "
+            f"you must output text in {output_language}."
+        )
+        if self.orig_sys_message is not None:
+            content = self.orig_sys_message.content + language_prompt
+            self._system_message = self.orig_sys_message.create_new_instance(
+                content
+            )
+        else:
+            self._system_message = BaseMessage.make_assistant_message(
+                role_name="Assistant",
+                content=language_prompt,
+            )
+        system_record = MemoryRecord(
+            message=self._system_message,
+            role_at_backend=OpenAIBackendRole.SYSTEM,
+        )
+        self.memory.clear()
+        self.memory.write_record(system_record)
+        return self._system_message
+    def get_info(
+        self,
+        session_id: Optional[str],
+        usage: Optional[Dict[str, int]],
+        termination_reasons: List[str],
+        num_tokens: int,
+        tool_calls: List[FunctionCallingRecord],
+        external_tool_request: Optional[ChatCompletionMessageToolCall] = None,
+    ) -> Dict[str, Any]:
+        r"""Returns a dictionary containing information about the chat session.
+        Args:
+            session_id (str, optional): The ID of the chat session.
+            usage (Dict[str, int], optional): Information about the usage of
+                the LLM.
+            termination_reasons (List[str]): The reasons for the termination
+                of the chat session.
+            num_tokens (int): The number of tokens used in the chat session.
+            tool_calls (List[FunctionCallingRecord]): The list of function
+                calling records, containing the information of called tools.
+            external_tool_request
+                (Optional[ChatCompletionMessageToolCall], optional):
+                The tool calling request of external tools from the model.
+                These requests are directly returned to the user instead of
+                being processed by the agent automatically.
+                (default: :obj:`None`)
+        Returns:
+            Dict[str, Any]: The chat session information.
+        """
+        return {
+            "id": session_id,
+            "usage": usage,
+            "termination_reasons": termination_reasons,
+            "num_tokens": num_tokens,
+            "tool_calls": tool_calls,
+            "external_tool_request": external_tool_request,
+        }
+    def init_messages(self) -> None:
+        r"""Initializes the stored messages list with the current system
+        message.
+        """
+        if self._system_message is not None:
+            system_record = MemoryRecord(
+                message=self._system_message,
+                role_at_backend=OpenAIBackendRole.SYSTEM,
+            )
+            self.memory.clear()
+            self.memory.write_record(system_record)
+        else:
+            self.memory.clear()
+    def record_message(self, message: BaseMessage) -> None:
+        r"""Records the externally provided message into the agent memory as if
+        it were an answer of the :obj:`ChatAgent` from the backend. Currently,
+        the choice of the critic is submitted with this method.
+        Args:
+            message (BaseMessage): An external message to be recorded in the
+                memory.
+        """
+        self.update_memory(message, OpenAIBackendRole.ASSISTANT)
+    def step(
+        self,
+        input_message: Union[BaseMessage, str],
+        response_format: Optional[Type[BaseModel]] = None,
+    ) -> ChatAgentResponse:
+        r"""Executes a single step in the chat session, generating a response
+        to the input message.
+        Args:
+            input_message (Union[BaseMessage, str]): The input message for the
+                agent. If provided as a BaseMessage, the `role` is adjusted to
+                `user` to indicate an external message.
+            response_format (Optional[Type[BaseModel]], optional): A Pydantic
+                model defining the expected structure of the response. Used to
+                generate a structured response if provided. (default:
+                :obj:`None`)
+        Returns:
+            ChatAgentResponse: Contains output messages, a termination status
+                flag, and session information.
+        """
+        if (
+            self.model_backend.model_config_dict.get("response_format")
+            and response_format
+        ):
+            raise ValueError(
+                "The `response_format` parameter cannot be set both in "
+                "the model configuration and in the ChatAgent step."
+            )
+        self.original_model_dict = self.model_backend.model_config_dict
+        model_response_format_modified = False
+        if (
+            response_format
+            and self.model_type.support_native_structured_output
+        ):
+            self.model_backend.model_config_dict = (
+                self.original_model_dict.copy()
+            )
+            self.model_backend.model_config_dict["response_format"] = (
+                response_format
+            )
+            model_response_format_modified = True
+        # Convert input message to BaseMessage if necessary
+        if isinstance(input_message, str):
+            input_message = BaseMessage.make_user_message(
+                role_name='User', content=input_message
+            )
+        # Handle tool prompt injection if needed
+        if (
+            self.is_tools_added()
+            and not self.model_type.support_native_tool_calling
+            and not self.tool_prompt_added
+        ):
+            self._inject_tool_prompt()
+        # Add user input to memory
+        self.update_memory(input_message, OpenAIBackendRole.USER)
+        try:
+            return self._handle_step(response_format, self.single_iteration)
+        finally:
+            if model_response_format_modified:
+                # Reset model config back to original state
+                self.model_backend.model_config_dict = self.original_model_dict
+    def _inject_tool_prompt(self) -> None:
+        r"""Generate and add the tool prompt to memory."""
+        tool_prompt = self._generate_tool_prompt(
+            self.model_backend.model_config_dict["tools"]
+        )
+        tool_msg = BaseMessage.make_assistant_message(
+            role_name="Assistant", content=tool_prompt
+        )
+        self.update_memory(tool_msg, OpenAIBackendRole.SYSTEM)
+        self.tool_prompt_added = True
+    def _handle_step(
+        self,
+        response_format: Optional[Type[BaseModel]],
+        single_step: bool,
+    ) -> ChatAgentResponse:
+        r"""Handles a single or multi-step interaction."""
+        if (
+            self.model_backend.model_config_dict.get("tool_choice")
+            == "required"
+            and not single_step
+        ):
+            raise ValueError(
+                "`tool_choice` cannot be set to `required` for multi-step"
+                " mode. To proceed, set `single_iteration` to `True`."
+            )
+        # Record function calls made during the session
+        tool_call_records: List[FunctionCallingRecord] = []
+        external_tool_request = None
+        while True:
+            try:
+                openai_messages, num_tokens = self.memory.get_context()
+            except RuntimeError as e:
+                self.model_backend.model_config_dict = self.original_model_dict
+                return self._step_token_exceed(
+                    e.args[1], tool_call_records, "max_tokens_exceeded"
+                )
+            # Prompt engineering approach for structured output for non-native tool calling models
+            inject_prompt_for_structured_output = (
+                response_format
+                and not self.model_type.support_native_structured_output
+            )
+            if inject_prompt_for_structured_output:
+                # update last openai message
+                usr_msg = openai_messages.pop()
+                usr_msg["content"] = generate_prompt_for_structured_output(
+                    response_format,
+                    usr_msg["content"],  # type: ignore [arg-type]
+                )
+                openai_messages.append(usr_msg)
+            # Process model response
+            (
+                response,
+                output_messages,
+                finish_reasons,
+                usage_dict,
+                response_id,
+            ) = self._step_model_response(openai_messages, num_tokens)
+            # Try to parse structured output to return a Pydantic object
+            if inject_prompt_for_structured_output and isinstance(
+                response, ChatCompletion
+            ):
+                content = response.choices[0].message.content
+                try:
+                    json_content = json.loads(str(content))
+                    output_messages[0].parsed = response_format(**json_content)  # type: ignore [assignment, misc]
+                except json.JSONDecodeError as e:
+                    logger.error(
+                        f"Failed in parsing the output into JSON: {e}"
+                    )
+                    output_messages[0].parsed = None
+                except ValidationError as e:
+                    logger.warning(
+                        "Successfully generating JSON response, "
+                        "but failed in parsing it into Pydantic object :"
+                        f"{e}, return the JSON response in parsed field"
+                    )
+                    output_messages[0].parsed = json_content
+            # Finalize on standard response in multi-step mode
+            if self._is_standard_response(response):
+                break
+            # Handle tool requests
+            tool_request = self._extract_tool_call(response)
+            if isinstance(response, ChatCompletion) and tool_request:
+                response.choices[0].message.tool_calls = [tool_request]
+                tool_call_records.append(
+                    self._step_tool_call_and_update(response)
+                )
+                if tool_request.function.name in self.external_tool_names:
+                    external_tool_request = tool_request
+                    info = self._step_get_info(
+                        output_messages,
+                        finish_reasons,
+                        usage_dict,
+                        response_id,
+                        tool_call_records,
+                        num_tokens,
+                        tool_request,
+                    )
+                    self._log_final_output(output_messages)
+                    self.model_backend.model_config_dict = (
+                        self.original_model_dict
+                    )
+                    return ChatAgentResponse(
+                        msgs=output_messages,
+                        terminated=self.terminated,
+                        info=info,
+                    )
+            # Single-step mode ends after one iteration
+            if single_step:
+                break
+        # Optional structured output via function calling
+        if (
+            response_format
+            and not inject_prompt_for_structured_output
+            and self.model_type
+            not in {
+                "gpt-4o",
+                "gpt-4o-mini",
+            }
+        ):
+            (
+                output_messages,
+                finish_reasons,
+                usage_dict,
+                response_id,
+                tool_call,
+                num_tokens,
+            ) = self._structure_output_with_function(response_format)
+            tool_call_records.append(tool_call)
+        # Final info and response
+        info = self._step_get_info(
+            output_messages,
+            finish_reasons,
+            usage_dict,
+            response_id,
+            tool_call_records,
+            num_tokens,
+            external_tool_request,
+        )
+        self._log_final_output(output_messages)
+        self.model_backend.model_config_dict = self.original_model_dict
+        return ChatAgentResponse(
+            msgs=output_messages, terminated=self.terminated, info=info
+        )
+    def _extract_tool_call(
+        self, response: Any
+    ) -> Optional[ChatCompletionMessageToolCall]:
+        r"""Extract the tool call from the model response, if present.
+        Args:
+            response (Any): The model's response object.
+        Returns:
+            Optional[ChatCompletionMessageToolCall]: The parsed tool call if
+                present, otherwise None.
+        """
+        # Check if the response contains tool calls
+        if (
+            self.is_tools_added()
+            and not self.model_type.support_native_tool_calling
+            and "</function>" in response.choices[0].message.content
+        ):
+            parsed_content = self._parse_tool_response(
+                response.choices[0].message.content
+            )
+            if parsed_content:
+                return ChatCompletionMessageToolCall(
+                    id=str(uuid.uuid4()),
+                    function=Function(
+                        arguments=str(parsed_content["arguments"]).replace(
+                            "'", '"'
+                        ),
+                        name=str(parsed_content["function"]),
+                    ),
+                    type="function",
+                )
+        elif (
+            self.is_tools_added()
+            and self.model_type.support_native_tool_calling
+            and response.choices[0].message.tool_calls
+        ):
+            return response.choices[0].message.tool_calls[0]
+        # No tool call found
+        return None
+    def _is_standard_response(self, response: Any) -> bool:
+        r"""Determine if the provided response is a standard reply without
+        tool calls.
+        Args:
+            response (Any): The response object to evaluate.
+        Returns:
+            bool: `True` if the response is a standard reply, `False`
+                otherwise.
+        """
+        if not self.is_tools_added():
+            return True
+        if not isinstance(response, ChatCompletion):
+            return True
+        if self.model_type.support_native_tool_calling:
+            return not response.choices[0].message.tool_calls
+        return "</function>" not in str(
+            response.choices[0].message.content or ""
+        )
+    def _log_final_output(self, output_messages: List[BaseMessage]) -> None:
+        r"""Log final messages or warnings about multiple responses."""
+        if len(output_messages) == 1:
+            self.record_message(output_messages[0])
+        else:
+            logger.warning(
+                "Multiple messages returned in `step()`. Record "
+                "selected message manually using `record_message()`."
+            )
+    async def step_async(
+        self,
+        input_message: Union[BaseMessage, str],
+        response_format: Optional[Type[BaseModel]] = None,
+    ) -> ChatAgentResponse:
+        r"""Performs a single step in the chat session by generating a response
+        to the input message. This agent step can call async function calls.
+        Args:
+            input_message (Union[BaseMessage, str]): The input message to the
+                agent. For BaseMessage input, its `role` field that specifies
+                the role at backend may be either `user` or `assistant` but it
+                will be set to `user` anyway since for the self agent any
+                incoming message is external. For str input, the `role_name`
+                would be `User`.
+            response_format (Optional[Type[BaseModel]], optional): A pydantic
+                model class that includes value types and field descriptions
+                used to generate a structured response by LLM. This schema
+                helps in defining the expected output format. (default:
+                :obj:`None`)
+        Returns:
+            ChatAgentResponse: A struct containing the output messages,
+                a boolean indicating whether the chat session has terminated,
+                and information about the chat session.
+        """
+        if isinstance(input_message, str):
+            input_message = BaseMessage.make_user_message(
+                role_name='User', content=input_message
+            )
+        self.update_memory(input_message, OpenAIBackendRole.USER)
+        tool_call_records: List[FunctionCallingRecord] = []
+        while True:
+            try:
+                openai_messages, num_tokens = self.memory.get_context()
+            except RuntimeError as e:
+                return self._step_token_exceed(
+                    e.args[1], tool_call_records, "max_tokens_exceeded"
+                )
+            (
+                response,
+                output_messages,
+                finish_reasons,
+                usage_dict,
+                response_id,
+            ) = self._step_model_response(openai_messages, num_tokens)
+            if (
+                not self.is_tools_added()
+                or not isinstance(response, ChatCompletion)
+                or not response.choices[0].message.tool_calls
+            ):
+                break
+            # Check for external tool call
+            external_tool_request = response.choices[0].message.tool_calls[0]
+            if external_tool_request.function.name in self.external_tool_names:
+                # if model calls an external tool, directly return the request
+                info = self._step_get_info(
+                    output_messages,
+                    finish_reasons,
+                    usage_dict,
+                    response_id,
+                    tool_call_records,
+                    num_tokens,
+                    external_tool_request,
+                )
+                return ChatAgentResponse(
+                    msgs=output_messages, terminated=self.terminated, info=info
+                )
+            # Normal function calling
+            tool_call_records.append(
+                await self._step_tool_call_and_update_async(response)
+            )
+        if (
+            response_format is not None
+            and self.model_type.support_native_tool_calling
+        ):
+            (
+                output_messages,
+                finish_reasons,
+                usage_dict,
+                response_id,
+                tool_call_record,
+                num_tokens,
+            ) = self._structure_output_with_function(response_format)
+            tool_call_records.append(tool_call_record)
+        info = self._step_get_info(
+            output_messages,
+            finish_reasons,
+            usage_dict,
+            response_id,
+            tool_call_records,
+            num_tokens,
+        )
+        if len(output_messages) == 1:
+            # Auto record if the output result is a single message
+            self.record_message(output_messages[0])
+        else:
+            logger.warning(
+                "Multiple messages returned in `step()`, message won't be "
+                "recorded automatically. Please call `record_message()` to "
+                "record the selected message manually."
+            )
+        return ChatAgentResponse(
+            msgs=output_messages, terminated=self.terminated, info=info
+        )
+    def _step_tool_call_and_update(
+        self, response: ChatCompletion
+    ) -> FunctionCallingRecord:
+        r"""Processes a function call within the chat completion response,
+        records the function call in the provided list of tool calls and
+        updates the memory of the current agent.
+        Args:
+            response (ChatCompletion): The response object from the chat
+                completion.
+        Returns:
+            FunctionCallingRecord: The record of calling the function.
+        """
+        # Perform function calling
+        func_assistant_msg, func_result_msg, tool_call_record = (
+            self._step_tool_call(response)
+        )
+        # Update the messages
+        self.update_memory(func_assistant_msg, OpenAIBackendRole.ASSISTANT)
+        self.update_memory(func_result_msg, OpenAIBackendRole.FUNCTION)
+        return tool_call_record
+    async def _step_tool_call_and_update_async(
+        self, response: ChatCompletion
+    ) -> FunctionCallingRecord:
+        (
+            func_assistant_msg,
+            func_result_msg,
+            func_record,
+        ) = await self.step_tool_call_async(response)
+        self.update_memory(func_assistant_msg, OpenAIBackendRole.ASSISTANT)
+        self.update_memory(func_result_msg, OpenAIBackendRole.FUNCTION)
+        return func_record
+    def _structure_output_with_function(
+        self, response_format: Type[BaseModel]
+    ) -> Tuple[
+        List[BaseMessage],
+        List[str],
+        Dict[str, int],
+        str,
+        FunctionCallingRecord,
+        int,
+    ]:
+        r"""Internal function of structuring the output of the agent based on
+        the given output schema.
+        Args:
+            response_format (Type[BaseModel]): The output schema to use for
+                structuring the output.
+        Returns:
+            Tuple[List[BaseMessage], List[str], Dict[str, int], str,
+                FunctionCallingRecord, int]:
+                A tuple containing the output messages, finish reasons, usage
+                dictionary, response ID, function calling record, and number of
+                tokens.
+        """
+        from camel.toolkits import FunctionTool
+        schema_json = get_pydantic_object_schema(response_format)
+        func_str = json_to_function_code(schema_json)
+        func_callable = func_string_to_callable(func_str)
+        func = FunctionTool(func_callable)
+        original_model_dict = self.model_backend.model_config_dict
+        # Replace the original tools with the structuring function
+        self.tool_dict = {func.get_function_name(): func}
+        self.model_backend.model_config_dict = original_model_dict.copy()
+        self.model_backend.model_config_dict["tools"] = [
+            func.get_openai_tool_schema()
+        ]
+        self.model_backend.model_config_dict["tool_choice"] = "required"
+        openai_messages, num_tokens = self.memory.get_context()
+        (
+            response,
+            output_messages,
+            finish_reasons,
+            usage_dict,
+            response_id,
+        ) = self._step_model_response(openai_messages, num_tokens)
+        if isinstance(response, ChatCompletion):
+            tool_call_record = self._step_tool_call_and_update(response)
+        else:
+            raise ValueError(
+                "Structured output is not supported for stream responses."
+            )
+        for base_message_item in output_messages:
+            base_message_item.content = json.dumps(tool_call_record.result)
+        # Recover the original tools
+        self.model_backend.model_config_dict = original_model_dict
+        return (
+            output_messages,
+            finish_reasons,
+            usage_dict,
+            response_id,
+            tool_call_record,
+            num_tokens,
+        )
+    def _step_model_response(
+        self,
+        openai_messages: List[OpenAIMessage],
+        num_tokens: int,
+    ) -> tuple[
+        Union[ChatCompletion, Stream],
+        List[BaseMessage],
+        List[str],
+        Dict[str, int],
+        str,
+    ]:
+        r"""Internal function for agent step model response."""
+        response = None
+        # Obtain the model's response
+        for _ in range(len(self.model_backend.models)):
+            try:
+                response = self.model_backend.run(openai_messages)
+                break
+            except Exception as exc:
+                logger.error(
+                    f"An error occurred while running model "
+                    f"{self.model_backend.model_type}, "
+                    f"index: {self.model_backend.current_model_index}",
+                    exc_info=exc,
+                )
+                continue
+        if not response:
+            raise ModelProcessingError(
+                "Unable to process messages: none of the provided models "
+                "run succesfully."
+            )
+        logger.info(
+            f"Model {self.model_backend.model_type}, "
+            f"index {self.model_backend.current_model_index}, "
+            f"processed these messages: {openai_messages}"
+        )
+        if isinstance(response, ChatCompletion):
+            output_messages, finish_reasons, usage_dict, response_id = (
+                self.handle_batch_response(response)
+            )
+        else:
+            output_messages, finish_reasons, usage_dict, response_id = (
+                self.handle_stream_response(response, num_tokens)
+            )
+        return (
+            response,
+            output_messages,
+            finish_reasons,
+            usage_dict,
+            response_id,
+        )
+    def _step_get_info(
+        self,
+        output_messages: List[BaseMessage],
+        finish_reasons: List[str],
+        usage_dict: Dict[str, int],
+        response_id: str,
+        tool_calls: List[FunctionCallingRecord],
+        num_tokens: int,
+        external_tool_request: Optional[ChatCompletionMessageToolCall] = None,
+    ) -> Dict[str, Any]:
+        r"""Process the output of a chat step and gather information about the
+        step.
+        This method checks for termination conditions, updates the agent's
+        state, and collects information about the chat step, including tool
+        calls and termination reasons.
+        Args:
+            output_messages (List[BaseMessage]): The messages generated in
+                this step.
+            finish_reasons (List[str]): The reasons for finishing the
+                generation for each message.
+            usage_dict (Dict[str, int]): Dictionary containing token usage
+                information.
+            response_id (str): The ID of the response from the model.
+            tool_calls (List[FunctionCallingRecord]): Records of function calls
+                made during this step.
+            num_tokens (int): The number of tokens used in this step.
+            external_tool_request (Optional[ChatCompletionMessageToolCall]):
+                Any external tool request made during this step.
+                (default: :obj:`None`)
+        Returns:
+            Dict[str, Any]: A dictionary containing information about the chat
+                step, including termination status, reasons, and tool call
+                information.
+        Note:
+            This method iterates over all response terminators and checks if
+            any of them signal termination. If a terminator signals
+            termination, the agent's state is updated accordingly, and the
+            termination reason is recorded.
+        """
+        termination = [
+            terminator.is_terminated(output_messages)
+            for terminator in self.response_terminators
+        ]
+        # Terminate the agent if any of the terminator terminates
+        self.terminated, termination_reason = next(
+            (
+                (terminated, termination_reason)
+                for terminated, termination_reason in termination
+                if terminated
+            ),
+            (False, None),
+        )
+        # For now only retain the first termination reason
+        if self.terminated and termination_reason is not None:
+            finish_reasons = [termination_reason] * len(finish_reasons)
+        info = self.get_info(
+            response_id,
+            usage_dict,
+            finish_reasons,
+            num_tokens,
+            tool_calls,
+            external_tool_request,
+        )
+        return info
+    def handle_batch_response(
+        self, response: ChatCompletion
+    ) -> Tuple[List[BaseMessage], List[str], Dict[str, int], str]:
+        r"""Process a batch response from the model and extract the necessary
+        information.
+        Args:
+            response (dict): Model response.
+        Returns:
+            tuple: A tuple of list of output `ChatMessage`, list of
+                finish reasons, usage dictionary, and response id.
+        """
+        output_messages: List[BaseMessage] = []
+        for choice in response.choices:
+            chat_message = BaseMessage(
+                role_name=self.role_name,
+                role_type=self.role_type,
+                meta_dict=dict(),
+                content=choice.message.content or "",
+                parsed=getattr(choice.message, 'parsed', None),
+            )
+            # Process log probabilities and append to the message meta information
+            if choice.logprobs is not None:
+                tokens_logprobs = choice.logprobs.content
+                if tokens_logprobs is not None:
+                    # Extract and structure logprob information
+                    logprobs_info = [
+                        {
+                            "token": token_logprob.token,
+                            "logprob": token_logprob.logprob,
+                            "top_logprobs": [
+                                (top_logprob.token, top_logprob.logprob)
+                                for top_logprob in token_logprob.top_logprobs
+                            ],
+                        }
+                        for token_logprob in tokens_logprobs
+                    ]
+                # Ensure meta_dict exists before adding logprobs info
+                if chat_message.meta_dict is None:
+                    chat_message.meta_dict = {}
+                chat_message.meta_dict["logprobs_info"] = logprobs_info
+            # Append the processed chat message to output
+            output_messages.append(chat_message)
+        finish_reasons = [
+            str(choice.finish_reason) for choice in response.choices
+        ]
+        usage = (
+            self._safe_model_dump(response.usage)
+            if response.usage is not None
+            else {}
+        )
+        return (
+            output_messages,
+            finish_reasons,
+            usage,
+            response.id,
+        )
+    def _safe_model_dump(self, obj) -> dict:
+        r"""Safely dump a Pydantic model to a dictionary.
+        This method attempts to use the `model_dump` method if available,
+        otherwise it falls back to the `dict` method.
+        Args:
+            obj: The Pydantic model instance to be dumped.
+        Returns:
+            dict: A dictionary representation of the Pydantic model.
+        """
+        # Check if the `model_dump` method exists (Pydantic v2)
+        if hasattr(obj, 'model_dump'):
+            return obj.model_dump()
+        # Fallback to `dict()` method (Pydantic v1)
+        elif hasattr(obj, 'dict'):
+            return obj.dict()
+        else:
+            raise TypeError("The object is not a Pydantic model")
+    def handle_stream_response(
+        self,
+        response: Stream[ChatCompletionChunk],
+        prompt_tokens: int,
+    ) -> Tuple[List[BaseMessage], List[str], Dict[str, int], str]:
+        r"""Process a stream response from the model and extract the necessary
+        information.
+        Args:
+            response (dict): Model response.
+            prompt_tokens (int): Number of input prompt tokens.
+        Returns:
+            tuple: A tuple of list of output `ChatMessage`, list of
+                finish reasons, usage dictionary, and response id.
+        """
+        content_dict: defaultdict = defaultdict(lambda: "")
+        finish_reasons_dict: defaultdict = defaultdict(lambda: "")
+        output_messages: List[BaseMessage] = []
+        response_id: str = ""
+        # All choices in one response share one role
+        for chunk in response:
+            response_id = chunk.id
+            for choice in chunk.choices:
+                index = choice.index
+                delta = choice.delta
+                if delta.content is not None:
+                    # When response has not been stopped
+                    # Notice that only the first chunk_dict has the "role"
+                    content_dict[index] += delta.content
+                if choice.finish_reason:
+                    finish_reasons_dict[index] = choice.finish_reason
+                    chat_message = BaseMessage(
+                        role_name=self.role_name,
+                        role_type=self.role_type,
+                        meta_dict=dict(),
+                        content=content_dict[index],
+                    )
+                    output_messages.append(chat_message)
+        finish_reasons = [
+            finish_reasons_dict[i] for i in range(len(finish_reasons_dict))
+        ]
+        usage_dict = self.get_usage_dict(output_messages, prompt_tokens)
+        return output_messages, finish_reasons, usage_dict, response_id
+    def _step_token_exceed(
+        self,
+        num_tokens: int,
+        tool_calls: List[FunctionCallingRecord],
+        termination_reason: str,
+    ) -> ChatAgentResponse:
+        r"""Return trivial response containing number of tokens and information
+        of called functions when the number of tokens exceeds.
+        Args:
+            num_tokens (int): Number of tokens in the messages.
+            tool_calls (List[FunctionCallingRecord]): List of information
+                objects of functions called in the current step.
+            termination_reason (str): String of termination reason.
+        Returns:
+            ChatAgentResponse: The struct containing trivial outputs and
+                information about token number and called functions.
+        """
+        self.terminated = True
+        output_messages: List[BaseMessage] = []
+        info = self.get_info(
+            None,
+            None,
+            [termination_reason],
+            num_tokens,
+            tool_calls,
+        )
+        return ChatAgentResponse(
+            msgs=output_messages,
+            terminated=self.terminated,
+            info=info,
+        )
+    def _step_tool_call(
+        self,
+        response: ChatCompletion,
+    ) -> Tuple[
+        FunctionCallingMessage, FunctionCallingMessage, FunctionCallingRecord
+    ]:
+        r"""Execute the function with arguments following the model's response.
+        Args:
+            response (Dict[str, Any]): The response obtained by calling the
+                model.
+        Returns:
+            tuple: A tuple consisting of two obj:`FunctionCallingMessage`,
+                one about the arguments and the other about the execution
+                result, and a struct for logging information about this
+                function call.
+        """
+        choice = response.choices[0]
+        if choice.message.tool_calls is None:
+            raise RuntimeError("Tool call is None")
+        func_name = choice.message.tool_calls[0].function.name
+        arguments_str = choice.message.tool_calls[0].function.arguments
+        args = self._safe_json_loads(arguments_str)
+        tool = self.tool_dict[func_name]
+        result = tool(**args)
+        tool_call_id = choice.message.tool_calls[0].id
+        assist_msg = FunctionCallingMessage(
+            role_name=self.role_name,
+            role_type=self.role_type,
+            meta_dict=None,
+            content="",
+            func_name=func_name,
+            args=args,
+            tool_call_id=tool_call_id,
+        )
+        func_msg = FunctionCallingMessage(
+            role_name=self.role_name,
+            role_type=self.role_type,
+            meta_dict=None,
+            content="",
+            func_name=func_name,
+            result=result,
+            tool_call_id=tool_call_id,
+        )
+        # Record information about this function call
+        func_record = FunctionCallingRecord(
+            func_name=func_name,
+            args=args,
+            result=result,
+            tool_call_id=tool_call_id,
+        )
+        return assist_msg, func_msg, func_record
+    def _safe_json_loads(self, arguments_str):
+        # Replace Python types with their JSON equivalents
+        arguments_str = arguments_str.replace("None", "null")
+        arguments_str = arguments_str.replace("True", "true")
+        arguments_str = arguments_str.replace("False", "false")
+        # Attempt to parse the corrected string
+        try:
+            return json.loads(arguments_str)
+        except json.JSONDecodeError as e:
+            raise ValueError(f"Invalid JSON format: {e}")
+    async def step_tool_call_async(
+        self,
+        response: ChatCompletion,
+    ) -> Tuple[
+        FunctionCallingMessage, FunctionCallingMessage, FunctionCallingRecord
+    ]:
+        r"""Execute the async function with arguments following the model's
+        response.
+        Args:
+            response (Dict[str, Any]): The response obtained by calling the
+                model.
+        Returns:
+            tuple: A tuple consisting of two obj:`FunctionCallingMessage`,
+                one about the arguments and the other about the execution
+                result, and a struct for logging information about this
+                function call.
+        """
+        # Note that when function calling is enabled, `n` is set to 1.
+        choice = response.choices[0]
+        if choice.message.tool_calls is None:
+            raise RuntimeError("Tool call is None")
+        func_name = choice.message.tool_calls[0].function.name
+        args = json.loads(choice.message.tool_calls[0].function.arguments)
+        tool = self.tool_dict[func_name]
+        result = await tool(**args)
+        tool_call_id = choice.message.tool_calls[0].id
+        assist_msg = FunctionCallingMessage(
+            role_name=self.role_name,
+            role_type=self.role_type,
+            meta_dict=None,
+            content="",
+            func_name=func_name,
+            args=args,
+            tool_call_id=tool_call_id,
+        )
+        func_msg = FunctionCallingMessage(
+            role_name=self.role_name,
+            role_type=self.role_type,
+            meta_dict=None,
+            content="",
+            func_name=func_name,
+            result=result,
+            tool_call_id=tool_call_id,
+        )
+        # Record information about this function call
+        func_record = FunctionCallingRecord(
+            func_name=func_name,
+            args=args,
+            result=result,
+            tool_call_id=tool_call_id,
+        )
+        return assist_msg, func_msg, func_record
+    def get_usage_dict(
+        self, output_messages: List[BaseMessage], prompt_tokens: int
+    ) -> Dict[str, int]:
+        r"""Get usage dictionary when using the stream mode.
+        Args:
+            output_messages (list): List of output messages.
+            prompt_tokens (int): Number of input prompt tokens.
+        Returns:
+            dict: Usage dictionary.
+        """
+        encoding = get_model_encoding(self.model_type.value_for_tiktoken)
+        completion_tokens = 0
+        for message in output_messages:
+            completion_tokens += len(encoding.encode(message.content))
+        usage_dict = dict(
+            completion_tokens=completion_tokens,
+            prompt_tokens=prompt_tokens,
+            total_tokens=completion_tokens + prompt_tokens,
+        )
+        return usage_dict
+    def add_model_scheduling_strategy(self, name: str, strategy_fn: Callable):
+        r"""Add a scheduling strategy method provided by user to ModelManger.
+        Args:
+            name (str): The name of the strategy.
+            strategy_fn (Callable): The scheduling strategy function.
+        """
+        self.model_backend.add_strategy(name, strategy_fn)
+    def __repr__(self) -> str:
+        r"""Returns a string representation of the :obj:`ChatAgent`.
+        Returns:
+            str: The string representation of the :obj:`ChatAgent`.
+        """
+        return (
+            f"ChatAgent({self.role_name}, {self.role_type}, {self.model_type})"
+        )

Paper2Poster/camel/agents/critic_agent.py ADDED Viewed

	@@ -0,0 +1,202 @@

+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+import random
+import warnings
+from typing import Any, Dict, Optional, Sequence
+from colorama import Fore
+from camel.agents.chat_agent import ChatAgent
+from camel.memories import AgentMemory
+from camel.messages import BaseMessage
+from camel.models import BaseModelBackend
+from camel.responses import ChatAgentResponse
+from camel.utils import get_first_int, print_text_animated
+# AgentOps decorator setting
+try:
+    import os
+    if os.getenv("AGENTOPS_API_KEY") is not None:
+        from agentops import track_agent
+    else:
+        raise ImportError
+except (ImportError, AttributeError):
+    from camel.utils import track_agent
+@track_agent(name="CriticAgent")
+class CriticAgent(ChatAgent):
+    r"""A class for the critic agent that assists in selecting an option.
+    Args:
+        system_message (BaseMessage): The system message for the critic
+            agent.
+        model (BaseModelBackend, optional): The model backend to use for
+            generating responses. (default: :obj:`OpenAIModel` with
+            `GPT_4O_MINI`)
+        message_window_size (int, optional): The maximum number of previous
+            messages to include in the context window. If `None`, no windowing
+            is performed. (default: :obj:`6`)
+        retry_attempts (int, optional): The number of retry attempts if the
+            critic fails to return a valid option. (default: :obj:`2`)
+        verbose (bool, optional): Whether to print the critic's messages.
+        logger_color (Any): The color of the menu options displayed to the
+            user. (default: :obj:`Fore.MAGENTA`)
+    """
+    def __init__(
+        self,
+        system_message: BaseMessage,
+        model: Optional[BaseModelBackend] = None,
+        memory: Optional[AgentMemory] = None,
+        message_window_size: int = 6,
+        retry_attempts: int = 2,
+        verbose: bool = False,
+        logger_color: Any = Fore.MAGENTA,
+    ) -> None:
+        super().__init__(
+            system_message,
+            model=model,
+            memory=memory,
+            message_window_size=message_window_size,
+        )
+        self.options_dict: Dict[str, str] = dict()
+        self.retry_attempts = retry_attempts
+        self.verbose = verbose
+        self.logger_color = logger_color
+    def flatten_options(self, messages: Sequence[BaseMessage]) -> str:
+        r"""Flattens the options to the critic.
+        Args:
+            messages (Sequence[BaseMessage]): A list of `BaseMessage` objects.
+        Returns:
+            str: A string containing the flattened options to the critic.
+        """
+        options = [message.content for message in messages]
+        flatten_options = (
+            f"> Proposals from "
+            f"{messages[0].role_name} ({messages[0].role_type}). "
+            "Please choose an option:\n"
+        )
+        for index, option in enumerate(options):
+            flatten_options += f"Option {index + 1}:\n{option}\n\n"
+            self.options_dict[str(index + 1)] = option
+        format = (
+            f"Please first enter your choice ([1-{len(self.options_dict)}]) "
+            "and then your explanation and comparison: "
+        )
+        return flatten_options + format
+    def get_option(self, input_message: BaseMessage) -> str:
+        r"""Gets the option selected by the critic.
+        Args:
+            input_message (BaseMessage): A `BaseMessage` object representing
+                the input message.
+        Returns:
+            str: The option selected by the critic.
+        """
+        # TODO: Add support for editing options by the critic.
+        msg_content = input_message.content
+        i = 0
+        while i < self.retry_attempts:
+            critic_response = self.step(input_message)
+            if critic_response.msgs is None or len(critic_response.msgs) == 0:
+                raise RuntimeError("Got None critic messages.")
+            if critic_response.terminated:
+                raise RuntimeError("Critic step failed.")
+            critic_msg = critic_response.msg
+            if self.verbose:
+                print_text_animated(
+                    self.logger_color + "\n> Critic response: "
+                    f"\x1b[3m{critic_msg.content}\x1b[0m\n"
+                )
+            choice = self.parse_critic(critic_msg)
+            if choice in self.options_dict:
+                return self.options_dict[choice]
+            else:
+                input_message = BaseMessage(
+                    role_name=input_message.role_name,
+                    role_type=input_message.role_type,
+                    meta_dict=input_message.meta_dict,
+                    content="> Invalid choice. Please choose again.\n"
+                    + msg_content,
+                )
+                i += 1
+        warnings.warn(
+            "Critic failed to get a valid option. "
+            f"After {self.retry_attempts} attempts. "
+            "Returning a random option."
+        )
+        return random.choice(list(self.options_dict.values()))
+    def parse_critic(self, critic_msg: BaseMessage) -> Optional[str]:
+        r"""Parses the critic's message and extracts the choice.
+        Args:
+            critic_msg (BaseMessage): A `BaseMessage` object representing the
+                critic's response.
+        Returns:
+            Optional[str]: The critic's choice as a string, or None if the
+                message could not be parsed.
+        """
+        choice = str(get_first_int(critic_msg.content))
+        return choice
+    def reduce_step(
+        self,
+        input_messages: Sequence[BaseMessage],
+    ) -> ChatAgentResponse:
+        r"""Performs one step of the conversation by flattening options to the
+        critic, getting the option, and parsing the choice.
+        Args:
+            input_messages (Sequence[BaseMessage]): A list of BaseMessage
+                objects.
+        Returns:
+            ChatAgentResponse: A `ChatAgentResponse` object includes the
+                critic's choice.
+        """
+        meta_chat_message = BaseMessage(
+            role_name=input_messages[0].role_name,
+            role_type=input_messages[0].role_type,
+            meta_dict=input_messages[0].meta_dict,
+            content="",
+        )
+        flatten_options = self.flatten_options(input_messages)
+        if self.verbose:
+            print_text_animated(
+                self.logger_color + f"\x1b[3m{flatten_options}\x1b[0m\n"
+            )
+        input_msg = meta_chat_message.create_new_instance(flatten_options)
+        option = self.get_option(input_msg)
+        output_msg = meta_chat_message.create_new_instance(option)
+        # TODO: The return `info` can be improved.
+        return ChatAgentResponse(
+            msgs=[output_msg],
+            terminated=False,
+            info={},
+        )

Paper2Poster/camel/agents/deductive_reasoner_agent.py ADDED Viewed

	@@ -0,0 +1,303 @@

+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+import re
+from typing import Dict, List, Optional, Union
+from camel.agents.chat_agent import ChatAgent
+from camel.logger import get_logger
+from camel.messages import BaseMessage
+from camel.models import BaseModelBackend
+from camel.prompts import TextPrompt
+from camel.types import RoleType
+logger = get_logger(__name__)
+# AgentOps decorator setting
+try:
+    import os
+    if os.getenv("AGENTOPS_API_KEY") is not None:
+        from agentops import track_agent
+    else:
+        raise ImportError
+except (ImportError, AttributeError):
+    from camel.utils import track_agent
+@track_agent(name="DeductiveReasonerAgent")
+class DeductiveReasonerAgent(ChatAgent):
+    r"""An agent responsible for deductive reasoning. Model of deductive
+    reasoning:
+        - L: A ⊕ C -> q * B
+        - A represents the known starting state.
+        - B represents the known target state.
+        - C represents the conditions required to transition from A to B.
+        - Q represents the quality or effectiveness of the transition from
+        A to B.
+        - L represents the path or process from A to B.
+    Args:
+        model (BaseModelBackend, optional): The model backend to use for
+            generating responses. (default: :obj:`OpenAIModel` with
+            `GPT_4O_MINI`)
+    """
+    def __init__(
+        self,
+        model: Optional[BaseModelBackend] = None,
+    ) -> None:
+        system_message = BaseMessage(
+            role_name="Insight Agent",
+            role_type=RoleType.ASSISTANT,
+            meta_dict=None,
+            content="You assign roles based on tasks.",
+        )
+        super().__init__(system_message, model=model)
+    def deduce_conditions_and_quality(
+        self,
+        starting_state: str,
+        target_state: str,
+        role_descriptions_dict: Optional[Dict[str, str]] = None,
+    ) -> Dict[str, Union[List[str], Dict[str, str]]]:
+        r"""Derives the conditions and quality from the starting state and the
+        target state based on the model of the deductive reasoning and the
+        knowledge base. It can optionally consider the roles involved in the
+        scenario, which allows tailoring the output more closely to the AI
+        agent's environment.
+        Args:
+            starting_state (str): The initial or starting state from which
+                conditions are deduced.
+            target_state (str): The target state of the task.
+            role_descriptions_dict (Optional[Dict[str, str]], optional): The
+                descriptions of the roles. (default: :obj:`None`)
+            role_descriptions_dict (Optional[Dict[str, str]], optional): A
+                dictionary describing the roles involved in the scenario. This
+                is optional and can be used to provide a context for the
+                CAMEL's role-playing, enabling the generation of more relevant
+                and tailored conditions and quality assessments. This could be
+                generated using a `RoleAssignmentAgent()` or defined manually
+                by the user.
+        Returns:
+            Dict[str, Union[List[str], Dict[str, str]]]: A dictionary with the
+                extracted data from the message. The dictionary contains three
+                keys:
+                - 'conditions': A list where each key is a condition ID and
+                    each value is the corresponding condition text.
+                - 'labels': A list of label strings extracted from the message.
+                - 'quality': A string of quality assessment strings extracted
+                    from the message.
+        """
+        self.reset()
+        deduce_prompt = """You are a deductive reasoner. You are tasked to
+        complete the TASK based on the THOUGHT OF DEDUCTIVE REASONING, the
+        STARTING STATE A and the TARGET STATE B. You are given the CONTEXT
+        CONTENT to help you complete the TASK.
+Your answer MUST strictly adhere to the structure of ANSWER TEMPLATE, ONLY
+fill in the BLANKs, and DO NOT alter or modify any other part of the template
+===== MODELING OF DEDUCTIVE REASONING =====
+You are tasked with understanding a mathematical model based on the components
+${A, B, C, Q, L}$. In this model: ``L: A ⊕ C -> q * B``.
+- $A$ represents the known starting state.
+- $B$ represents the known target state.
+- $C$ represents the conditions required to transition from $A$ to $B$.
+- $Q$ represents the quality or effectiveness of the transition from $A$ to
+$B$.
+- $L$ represents the path or process from $A$ to $B$.
+===== THOUGHT OF DEDUCTIVE REASONING =====
+1. Define the Parameters of A and B:
+    - Characterization: Before delving into transitions, thoroughly understand
+    the nature and boundaries of both $A$ and $B$. This includes the type,
+    properties, constraints, and possible interactions between the two.
+    - Contrast and Compare: Highlight the similarities and differences between
+    $A$ and $B$. This comparative analysis will give an insight into what
+    needs changing and what remains constant.
+2. Historical & Empirical Analysis:
+    - Previous Transitions according to the Knowledge Base of GPT: (if
+    applicable) Extract conditions and patterns from the historical instances
+    where a similar transition from a state comparable to $A$ moved towards
+    $B$.
+    - Scientific Principles: (if applicable) Consider the underlying
+    scientific principles governing or related to the states and their
+    transition. For example, if $A$ and $B$ are physical states, laws of
+    physics might apply.
+3. Logical Deduction of Conditions ($C$):
+    - Direct Path Analysis: What are the immediate and direct conditions
+    required to move from $A$ to $B$?
+    - Intermediate States: Are there states between $A$ and $B$ that must be
+    traversed or can be used to make the transition smoother or more
+    efficient? If yes, what is the content?
+    - Constraints & Limitations: Identify potential barriers or restrictions
+    in moving from $A$ to $B$. These can be external (e.g., environmental
+    factors) or internal (properties of $A$ or $B$).
+    - Resource and Information Analysis: What resources and information are
+    required for the transition? This could be time, entity, factor, code
+    language, software platform, unknowns, etc.
+    - External Influences: Consider socio-economic, political, or
+    environmental factors (if applicable) that could influence the transition
+    conditions.
+    - Creative/Heuristic Reasoning: Open your mind to multiple possible $C$'s,
+    no matter how unconventional they might seem. Utilize analogies,
+    metaphors, or brainstorming techniques to envision possible conditions or
+    paths from $A$ to $B$.
+    - The conditions $C$ should be multiple but in one sentence. And each
+    condition should be concerned with one aspect/entity.
+4. Entity/Label Recognition of Conditions ($C$):
+    - Identify and categorize entities of Conditions ($C$) such as the names,
+    locations, dates, specific technical terms or contextual parameters that
+    might be associated with events, innovations post-2022.
+    - The output of the entities/labels will be used as tags or labels for
+    semantic similarity searches. The entities/labels may be the words, or
+    phrases, each of them should contain valuable, high information entropy
+    information, and should be independent.
+    - Ensure that the identified entities are formatted in a manner suitable
+    for database indexing and retrieval. Organize the entities into
+    categories, and combine the category with its instance into a continuous
+    phrase, without using colons or other separators.
+    - Format these entities for database indexing: output the category rather
+    than its instance/content into a continuous phrase. For example, instead
+    of "Jan. 02", identify it as "Event time".
+5. Quality Assessment ($Q$):
+    - Efficiency: How efficient is the transition from $A$ to $B$, which
+    measures the resources used versus the desired outcome?
+    - Effectiveness: Did the transition achieve the desired outcome or was the
+    target state achieved as intended?
+    - Safety & Risks: Assess any risks associated with the transition and the
+    measures to mitigate them.
+    - Feedback Mechanisms: Incorporate feedback loops to continuously monitor
+    and adjust the quality of transition, making it more adaptive.
+6. Iterative Evaluation:
+    - Test & Refine: Based on the initially deduced conditions and assessed
+    quality, iterate the process to refine and optimize the transition. This
+    might involve tweaking conditions, employing different paths, or changing
+    resources.
+    - Feedback Integration: Use feedback to make improvements and increase the
+    quality of the transition.
+7. Real-world scenarios often present challenges that may not be captured by
+models and frameworks. While using the model, maintain an adaptive mindset:
+    - Scenario Exploration: Continuously imagine various possible scenarios,
+    both positive and negative, to prepare for unexpected events.
+    - Flexibility: Be prepared to modify conditions ($C$) or alter the path/
+    process ($L$) if unforeseen challenges arise.
+    - Feedback Integration: Rapidly integrate feedback from actual
+    implementations to adjust the model's application, ensuring relevancy and
+    effectiveness.
+===== TASK =====
+Given the starting state $A$ and the target state $B$, assuming that a path
+$L$ always exists between $A$ and $B$, how can one deduce or identify the
+necessary conditions $C$ and the quality $Q$ of the transition?
+===== STARTING STATE $A$ =====
+{starting_state}
+===== TARGET STATE $B$ =====
+{target_state}
+{role_with_description_prompt}
+===== ANSWER TEMPLATE =====
+- Characterization and comparison of $A$ and $B$:\n<BLANK>
+- Historical & Empirical Analysis:\n<BLANK>/None
+- Logical Deduction of Conditions ($C$) (multiple conditions can be deduced):
+    condition <NUM>:
+        <BLANK>.
+- Entity/Label Recognition of Conditions:\n[<BLANK>, <BLANK>, ...] (include
+square brackets)
+- Quality Assessment ($Q$) (do not use symbols):
+    <BLANK>.
+- Iterative Evaluation:\n<BLANK>/None"""
+        if role_descriptions_dict is not None:
+            role_names = role_descriptions_dict.keys()
+            role_with_description_prompt = (
+                "===== ROLES WITH DESCRIPTIONS =====\n"
+                + "\n".join(
+                    f"{role_name}:\n{role_descriptions_dict[role_name]}\n"
+                    for role_name in role_names
+                )
+                + "\n\n"
+            )
+        else:
+            role_with_description_prompt = ""
+        deduce_prompt = TextPrompt(deduce_prompt)
+        deduce = deduce_prompt.format(
+            starting_state=starting_state,
+            target_state=target_state,
+            role_with_description_prompt=role_with_description_prompt,
+        )
+        conditions_and_quality_generation_msg = BaseMessage.make_user_message(
+            role_name="Deductive Reasoner", content=deduce
+        )
+        response = self.step(
+            input_message=conditions_and_quality_generation_msg
+        )
+        if response.terminated:
+            raise RuntimeError(
+                "Deduction failed. Error:\n" + f"{response.info}"
+            )
+        msg: BaseMessage = response.msg
+        logger.info(f"Message content:\n{msg.content}")
+        # Extract the conditions from the message
+        conditions_dict = {
+            f"condition {i}": cdt.replace("<", "")
+            .replace(">", "")
+            .strip()
+            .strip('\n')
+            for i, cdt in re.findall(
+                r"condition (\d+):\s*(.+?)(?=condition \d+|- Entity)",
+                msg.content,
+                re.DOTALL,
+            )
+        }
+        # Extract the labels from the message
+        labels = [
+            label.strip().strip('\n').strip("\"'")
+            for label in re.findall(
+                r"Entity/Label Recognition of Conditions:\n\[(.+?)\]",
+                msg.content,
+                re.DOTALL,
+            )[0].split(",")
+        ]
+        # Extract the quality from the message
+        quality = next(
+            q.strip().strip('\n')
+            for q in re.findall(
+                r"Quality Assessment \(\$Q\$\) \(do not use symbols\):"
+                r"\n(.+?)- Iterative",
+                msg.content,
+                re.DOTALL,
+            )
+        )
+        # Convert them into JSON format
+        conditions_and_quality_json: Dict[
+            str, Union[List[str], Dict[str, str]]
+        ] = {}
+        conditions_and_quality_json["conditions"] = conditions_dict
+        conditions_and_quality_json["labels"] = labels
+        conditions_and_quality_json["evaluate_quality"] = quality
+        return conditions_and_quality_json

Paper2Poster/camel/agents/embodied_agent.py ADDED Viewed

	@@ -0,0 +1,201 @@

+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+from typing import Any, List, Optional
+from colorama import Fore
+from camel.agents.chat_agent import ChatAgent
+from camel.agents.tool_agents.base import BaseToolAgent
+from camel.interpreters import (
+    BaseInterpreter,
+    InternalPythonInterpreter,
+    SubprocessInterpreter,
+)
+from camel.messages import BaseMessage
+from camel.models import BaseModelBackend
+from camel.responses import ChatAgentResponse
+from camel.utils import print_text_animated
+# AgentOps decorator setting
+try:
+    import os
+    if os.getenv("AGENTOPS_API_KEY") is not None:
+        from agentops import track_agent
+    else:
+        raise ImportError
+except (ImportError, AttributeError):
+    from camel.utils import track_agent
+@track_agent(name="EmbodiedAgent")
+class EmbodiedAgent(ChatAgent):
+    r"""Class for managing conversations of CAMEL Embodied Agents.
+    Args:
+        system_message (BaseMessage): The system message for the chat agent.
+        model (BaseModelBackend, optional): The model backend to use for
+            generating responses. (default: :obj:`OpenAIModel` with
+            `GPT_4O_MINI`)
+        message_window_size (int, optional): The maximum number of previous
+            messages to include in the context window. If `None`, no windowing
+            is performed. (default: :obj:`None`)
+        tool_agents (List[BaseToolAgent], optional): The tools agents to use in
+            the embodied agent. (default: :obj:`None`)
+        code_interpreter (BaseInterpreter, optional): The code interpreter to
+            execute codes. If `code_interpreter` and `tool_agent` are both
+            `None`, default to `SubProcessInterpreter`. If `code_interpreter`
+            is `None` and `tool_agents` is not `None`, default to
+            `InternalPythonInterpreter`.  (default: :obj:`None`)
+        verbose (bool, optional): Whether to print the critic's messages.
+        logger_color (Any): The color of the logger displayed to the user.
+            (default: :obj:`Fore.MAGENTA`)
+    """
+    def __init__(
+        self,
+        system_message: BaseMessage,
+        model: Optional[BaseModelBackend] = None,
+        message_window_size: Optional[int] = None,
+        tool_agents: Optional[List[BaseToolAgent]] = None,
+        code_interpreter: Optional[BaseInterpreter] = None,
+        verbose: bool = False,
+        logger_color: Any = Fore.MAGENTA,
+    ) -> None:
+        self.tool_agents = tool_agents
+        self.code_interpreter: BaseInterpreter
+        if code_interpreter is not None:
+            self.code_interpreter = code_interpreter
+        elif self.tool_agents:
+            self.code_interpreter = InternalPythonInterpreter()
+        else:
+            self.code_interpreter = SubprocessInterpreter()
+        if self.tool_agents:
+            system_message = self._set_tool_agents(system_message)
+        self.verbose = verbose
+        self.logger_color = logger_color
+        super().__init__(
+            system_message=system_message,
+            model=model,
+            message_window_size=message_window_size,
+        )
+    def _set_tool_agents(self, system_message: BaseMessage) -> BaseMessage:
+        action_space_prompt = self._get_tool_agents_prompt()
+        result_message = system_message.create_new_instance(
+            content=system_message.content.format(
+                action_space=action_space_prompt
+            )
+        )
+        if self.tool_agents is not None:
+            self.code_interpreter.update_action_space(
+                {tool.name: tool for tool in self.tool_agents}
+            )
+        return result_message
+    def _get_tool_agents_prompt(self) -> str:
+        r"""Returns the action space prompt.
+        Returns:
+            str: The action space prompt.
+        """
+        if self.tool_agents is not None:
+            return "\n".join(
+                [
+                    f"*** {tool.name} ***:\n {tool.description}"
+                    for tool in self.tool_agents
+                ]
+            )
+        else:
+            return ""
+    def get_tool_agent_names(self) -> List[str]:
+        r"""Returns the names of tool agents.
+        Returns:
+            List[str]: The names of tool agents.
+        """
+        if self.tool_agents is not None:
+            return [tool.name for tool in self.tool_agents]
+        else:
+            return []
+    # ruff: noqa: E501
+    def step(self, input_message: BaseMessage) -> ChatAgentResponse:  # type: ignore[override]
+        r"""Performs a step in the conversation.
+        Args:
+            input_message (BaseMessage): The input message.
+        Returns:
+            ChatAgentResponse: A struct containing the output messages,
+                a boolean indicating whether the chat session has terminated,
+                and information about the chat session.
+        """
+        response = super().step(input_message)
+        if response.msgs is None or len(response.msgs) == 0:
+            raise RuntimeError("Got None output messages.")
+        if response.terminated:
+            raise RuntimeError(f"{self.__class__.__name__} step failed.")
+        # NOTE: Only single output messages are supported
+        explanations, codes = response.msg.extract_text_and_code_prompts()
+        if self.verbose:
+            for explanation, code in zip(explanations, codes):
+                print_text_animated(
+                    self.logger_color + f"> Explanation:\n{explanation}"
+                )
+                print_text_animated(self.logger_color + f"> Code:\n{code}")
+            if len(explanations) > len(codes):
+                print_text_animated(
+                    self.logger_color + f"> Explanation:\n{explanations[-1]}"
+                )
+        content = response.msg.content
+        if codes is not None:
+            try:
+                content = "\n> Executed Results:\n"
+                for block_idx, code in enumerate(codes):
+                    executed_output = self.code_interpreter.run(
+                        code, code.code_type
+                    )
+                    content += (
+                        f"Executing code block {block_idx}: {{\n"
+                        + executed_output
+                        + "}\n"
+                    )
+            except InterruptedError as e:
+                content = (
+                    f"\n> Running code fail: {e}\n"
+                    "Please regenerate the code."
+                )
+        # TODO: Handle errors
+        content = input_message.content + f"\n> Embodied Actions:\n{content}"
+        message = BaseMessage(
+            input_message.role_name,
+            input_message.role_type,
+            input_message.meta_dict,
+            content,
+        )
+        return ChatAgentResponse(
+            msgs=[message],
+            terminated=response.terminated,
+            info=response.info,
+        )

Paper2Poster/camel/agents/knowledge_graph_agent.py ADDED Viewed

	@@ -0,0 +1,259 @@

+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+from typing import TYPE_CHECKING, Optional, Union
+if TYPE_CHECKING:
+    from unstructured.documents.elements import Element
+from camel.agents import ChatAgent
+from camel.messages import BaseMessage
+from camel.models import BaseModelBackend
+from camel.prompts import TextPrompt
+from camel.storages.graph_storages.graph_element import (
+    GraphElement,
+    Node,
+    Relationship,
+)
+from camel.types import RoleType
+# AgentOps decorator setting
+try:
+    import os
+    if os.getenv("AGENTOPS_API_KEY") is not None:
+        from agentops import track_agent
+    else:
+        raise ImportError
+except (ImportError, AttributeError):
+    from camel.utils import track_agent
+text_prompt = """
+You are tasked with extracting nodes and relationships from given content and
+structures them into Node and Relationship objects. Here's the outline of what
+you needs to do:
+Content Extraction:
+You should be able to process input content and identify entities mentioned
+within it.
+Entities can be any noun phrases or concepts that represent distinct entities
+in the context of the given content.
+Node Extraction:
+For each identified entity, you should create a Node object.
+Each Node object should have a unique identifier (id) and a type (type).
+Additional properties associated with the node can also be extracted and
+stored.
+Relationship Extraction:
+You should identify relationships between entities mentioned in the content.
+For each relationship, create a Relationship object.
+A Relationship object should have a subject (subj) and an object (obj) which
+are Node objects representing the entities involved in the relationship.
+Each relationship should also have a type (type), and additional properties if
+applicable.
+Output Formatting:
+The extracted nodes and relationships should be formatted as instances of the
+provided Node and Relationship classes.
+Ensure that the extracted data adheres to the structure defined by the classes.
+Output the structured data in a format that can be easily validated against
+the provided code.
+Instructions for you:
+Read the provided content thoroughly.
+Identify distinct entities mentioned in the content and categorize them as
+nodes.
+Determine relationships between these entities and represent them as directed
+relationships.
+Provide the extracted nodes and relationships in the specified format below.
+Example for you:
+Example Content:
+"John works at XYZ Corporation. He is a software engineer. The company is
+located in New York City."
+Expected Output:
+Nodes:
+Node(id='John', type='Person')
+Node(id='XYZ Corporation', type='Organization')
+Node(id='New York City', type='Location')
+Relationships:
+Relationship(subj=Node(id='John', type='Person'), obj=Node(id='XYZ
+Corporation', type='Organization'), type='WorksAt')
+Relationship(subj=Node(id='John', type='Person'), obj=Node(id='New York City',
+type='Location'), type='ResidesIn')
+===== TASK =====
+Please extracts nodes and relationships from given content and structures them
+into Node and Relationship objects.
+{task}
+"""
+@track_agent(name="KnowledgeGraphAgent")
+class KnowledgeGraphAgent(ChatAgent):
+    r"""An agent that can extract node and relationship information for
+    different entities from given `Element` content.
+    Attributes:
+        task_prompt (TextPrompt): A prompt for the agent to extract node and
+            relationship information for different entities.
+    """
+    def __init__(
+        self,
+        model: Optional[BaseModelBackend] = None,
+    ) -> None:
+        r"""Initialize the `KnowledgeGraphAgent`.
+        Args:
+        model (BaseModelBackend, optional): The model backend to use for
+            generating responses. (default: :obj:`OpenAIModel` with
+            `GPT_4O_MINI`)
+        """
+        system_message = BaseMessage(
+            role_name="Graphify",
+            role_type=RoleType.ASSISTANT,
+            meta_dict=None,
+            content="Your mission is to transform unstructured content "
+            "into structured graph data. Extract nodes and relationships with "
+            "precision, and let the connections unfold. Your graphs will "
+            "illuminate the hidden connections within the chaos of "
+            "information.",
+        )
+        super().__init__(system_message, model=model)
+    def run(
+        self,
+        element: "Element",
+        parse_graph_elements: bool = False,
+    ) -> Union[str, GraphElement]:
+        r"""Run the agent to extract node and relationship information.
+        Args:
+            element (Element): The input element.
+            parse_graph_elements (bool, optional): Whether to parse into
+                `GraphElement`. Defaults to `False`.
+        Returns:
+            Union[str, GraphElement]: The extracted node and relationship
+                information. If `parse_graph_elements` is `True` then return
+                `GraphElement`, else return `str`.
+        """
+        self.reset()
+        self.element = element
+        knowledge_graph_prompt = TextPrompt(text_prompt)
+        knowledge_graph_generation = knowledge_graph_prompt.format(
+            task=str(element)
+        )
+        knowledge_graph_generation_msg = BaseMessage.make_user_message(
+            role_name="Graphify", content=knowledge_graph_generation
+        )
+        response = self.step(input_message=knowledge_graph_generation_msg)
+        content = response.msg.content
+        if parse_graph_elements:
+            content = self._parse_graph_elements(content)
+        return content
+    def _validate_node(self, node: Node) -> bool:
+        r"""Validate if the object is a valid Node.
+        Args:
+            node (Node): Object to be validated.
+        Returns:
+            bool: True if the object is a valid Node, False otherwise.
+        """
+        return (
+            isinstance(node, Node)
+            and isinstance(node.id, (str, int))
+            and isinstance(node.type, str)
+        )
+    def _validate_relationship(self, relationship: Relationship) -> bool:
+        r"""Validate if the object is a valid Relationship.
+        Args:
+            relationship (Relationship): Object to be validated.
+        Returns:
+            bool: True if the object is a valid Relationship, False otherwise.
+        """
+        return (
+            isinstance(relationship, Relationship)
+            and self._validate_node(relationship.subj)
+            and self._validate_node(relationship.obj)
+            and isinstance(relationship.type, str)
+        )
+    def _parse_graph_elements(self, input_string: str) -> GraphElement:
+        r"""Parses graph elements from given content.
+        Args:
+            input_string (str): The input content.
+        Returns:
+            GraphElement: The parsed graph elements.
+        """
+        import re
+        # Regular expressions to extract nodes and relationships
+        node_pattern = r"Node\(id='(.*?)', type='(.*?)'\)"
+        rel_pattern = (
+            r"Relationship\(subj=Node\(id='(.*?)', type='(.*?)'\), "
+            r"obj=Node\(id='(.*?)', type='(.*?)'\), type='(.*?)'\)"
+        )
+        nodes = {}
+        relationships = []
+        # Extract nodes
+        for match in re.finditer(node_pattern, input_string):
+            id, type = match.groups()
+            properties = {'source': 'agent_created'}
+            if id not in nodes:
+                node = Node(id=id, type=type, properties=properties)
+                if self._validate_node(node):
+                    nodes[id] = node
+        # Extract relationships
+        for match in re.finditer(rel_pattern, input_string):
+            subj_id, subj_type, obj_id, obj_type, rel_type = match.groups()
+            properties = {'source': 'agent_created'}
+            if subj_id in nodes and obj_id in nodes:
+                subj = nodes[subj_id]
+                obj = nodes[obj_id]
+                relationship = Relationship(
+                    subj=subj, obj=obj, type=rel_type, properties=properties
+                )
+                if self._validate_relationship(relationship):
+                    relationships.append(relationship)
+        return GraphElement(
+            nodes=list(nodes.values()),
+            relationships=relationships,
+            source=self.element,
+        )

Paper2Poster/camel/agents/multi_hop_generator_agent.py ADDED Viewed

	@@ -0,0 +1,117 @@

+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+import textwrap
+from typing import Any
+from pydantic import ConfigDict
+from camel.agents.programmed_agent_instruction import (
+    ProgrammableChatAgent,
+    ProgrammedAgentInstructionResult,
+    programmable_capability,
+)
+from camel.datagen.source2synth.models import (
+    ContextPrompt,
+    MultiHopQA,
+)
+from camel.messages import BaseMessage
+class MultiHopGeneratorAgent(ProgrammableChatAgent):
+    r"""An agent specialized in generating multi-hop question-answer pairs.
+    This agent is designed to create complex questions that require multiple
+    steps of reasoning to answer. It analyzes context to identify related
+    facts and generates questions that require connecting these facts
+    logically.
+    Attributes:
+        model_config (ConfigDict): Configuration for model behavior.
+        system_message (BaseMessage): System message defining agent's role and
+            instructions.
+    """
+    model_config = ConfigDict(arbitrary_types_allowed=True)
+    def __init__(self, **kwargs: Any) -> None:
+        r"""Initialize the MultiHopGeneratorAgent.
+        Args:
+            **kwargs (Any): Additional keyword arguments to pass to parent
+                class.
+        """
+        super().__init__(**kwargs)
+        system_text: str = textwrap.dedent(
+            """\
+            You are an expert at generating
+            multi-hop question-answer pairs.
+            For each context, you should:
+            1. Identify multiple related facts or pieces of information
+            2. Create questions that require reasoning across these multiple pieces
+            3. Ensure the reasoning chain is clear and logical
+            4. Generate questions that require at least 2-3 steps of reasoning
+            5. Include the reasoning steps in the answer
+            Give your response with this information:
+            Question: [Complex question requiring multiple reasoning steps]
+            Reasoning Steps:
+            1. [First reasoning step]
+            2. [Second reasoning step]
+            3. [Final reasoning step]
+            Answer: [Final answer]
+            Supporting Facts: [List of relevant text segments used]
+            """  # noqa: E501
+        )
+        self.system_message = BaseMessage.make_assistant_message(
+            role_name='Assistant', content=system_text
+        )
+    @programmable_capability
+    def generate_multi_hop_qa(
+        self, context: str
+    ) -> ProgrammedAgentInstructionResult[MultiHopQA]:
+        r"""Generate a multi-hop question-answer pair from given context.
+        Args:
+            context (str): The input text context to generate QA from.
+        Returns:
+            ProgrammedAgentInstructionResult[MultiHopQA]: Result containing the
+                generated question, reasoning steps, answer, and supporting
+                facts.
+        Raises:
+            RuntimeError: If the agent fails to generate a response.
+        """
+        context_prompt = ContextPrompt(
+            main_context=context, related_contexts=None
+        )
+        user_message = BaseMessage.make_user_message(
+            content=context_prompt.model_dump_json(), role_name="User"
+        )
+        response = self.step(
+            input_message=user_message, response_format=MultiHopQA
+        )
+        value = MultiHopQA.model_validate_json(response.msgs[0].content)
+        if response.msgs:
+            return ProgrammedAgentInstructionResult(
+                user_message=user_message,
+                agent_message=response.msgs[0],
+                value=value,
+            )
+        raise RuntimeError("No response from agent")

Paper2Poster/camel/agents/programmed_agent_instruction.py ADDED Viewed

	@@ -0,0 +1,203 @@

+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+import abc
+import threading
+from enum import Enum
+from functools import wraps
+from typing import Any, Callable, Generic, Optional, TypeVar
+from pydantic import BaseModel, ConfigDict
+from camel.agents import ChatAgent
+from camel.messages import BaseMessage
+T = TypeVar('T')
+class ProgrammableAgentRequirement(Enum):
+    r"""Requirements for programmable agent state.
+    Defines the possible requirements that can be used to repair the state
+    of a programmable agent.
+    Attributes:
+        LAST_MESSAGE_NOT_USER (str): Requires that the last message in the
+            conversation was not from the user.
+    """
+    LAST_MESSAGE_NOT_USER = "LAST_MESSAGE_NOT_USER"
+class ProgrammedAgentInstructionResult(BaseModel, Generic[T]):
+    r"""Result of a programmable agent instruction execution.
+    Contains the messages exchanged during execution and the computed value.
+    The value type is specified by the generic type parameter T.
+    Attributes:
+        user_message (BaseMessage): The message sent by the user.
+        agent_message (BaseMessage): The message sent by the agent.
+        value (T): The computed result value of type T.
+    """
+    user_message: BaseMessage
+    agent_message: BaseMessage
+    value: T
+    model_config = ConfigDict(arbitrary_types_allowed=True)
+class AbstractProgrammableAgent(abc.ABC):
+    r"""Abstract class for a programmable agent.
+    A programmable agent is an agent that can be programmed to perform a
+    specific function or task. This class defines the interface for a
+    programmable agent.
+    These methods should be implemented in order to ensure the agent supports
+    the necessary guarantees to enable a programming interface while
+    maintaining compatibility in a multi-agent system.
+    A programmable agent is responsible for providing and maintaining a
+    programming interface for its functionality.
+    """
+    @abc.abstractmethod
+    def run_atomic(
+        self, callback: Callable[[], ProgrammedAgentInstructionResult[T]]
+    ) -> ProgrammedAgentInstructionResult[T]:
+        r"""Run an atomic operation on the agent.
+        An atomic operation is an operation that is guaranteed to
+        be executed without interruption by any other operation.
+        Args:
+            callback (Callable[[], ProgrammedAgentInstructionResult[T]]): The
+                operation to execute atomically.
+        Returns:
+            ProgrammedAgentInstructionResult[T]: The result of the operation.
+        Raises:
+            RuntimeError: If an operation is already in progress.
+        """
+        raise NotImplementedError
+    @abc.abstractmethod
+    def repair_state(self, requirement: ProgrammableAgentRequirement) -> None:
+        r"""Repair the state of the agent.
+        Agents may have other non-atomic interfaces, such as a user interface,
+        or chat between other agents. This method should restore the agent to
+        a state where it can perform operations according to the specified
+        requirement.
+        Args:
+            requirement (ProgrammableAgentRequirement): The requirement to
+                repair the state for.
+        """
+        raise NotImplementedError
+def programmable_capability(
+    func: Callable[..., ProgrammedAgentInstructionResult[T]],
+) -> Callable[..., ProgrammedAgentInstructionResult[T]]:
+    r"""Decorator for programmable agent capabilities.
+    This decorator ensures that the decorated method is executed atomically
+    and maintains the agent's state guarantees.
+    Args:
+        func (Callable[..., ProgrammedAgentInstructionResult[T]]): The method
+            to decorate.
+    Returns:
+        Callable[..., ProgrammedAgentInstructionResult[T]]: The decorated
+            method that ensures atomic execution.
+    """
+    @wraps(func)
+    def wrapper(
+        self, *args: Any, **kwargs: Any
+    ) -> ProgrammedAgentInstructionResult[T]:
+        return self.run_atomic(lambda: func(self, *args, **kwargs))
+    return wrapper
+class ProgrammableChatAgent(ChatAgent, AbstractProgrammableAgent):
+    r"""A chat agent that can be programmed to perform specific tasks.
+    Provides a default implementation of atomic execution using threading locks
+    and basic state tracking for message roles. Implementing classes need to
+    provide specific repair logic for their use cases.
+    Attributes:
+        _operation_lock (threading.Lock): Lock for ensuring atomic operations.
+        _last_message_role (Optional[str]): Role of the last message in the
+            conversation.
+    """
+    def __init__(self, **kwargs: Any) -> None:
+        r"""Initialize the ProgrammableChatAgent.
+        Args:
+            **kwargs (Any): Additional keyword arguments to pass to parent
+                class.
+        """
+        super().__init__(**kwargs)
+        self._operation_lock = threading.Lock()
+        self._last_message_role: Optional[str] = None
+    def run_atomic(
+        self, callback: Callable[[], ProgrammedAgentInstructionResult[T]]
+    ) -> ProgrammedAgentInstructionResult[T]:
+        r"""Run an atomic operation on the agent.
+        Ensures thread-safe execution of the callback function by using a lock.
+        Args:
+            callback (Callable[[], ProgrammedAgentInstructionResult[T]]): The
+                operation to execute atomically.
+        Returns:
+            ProgrammedAgentInstructionResult[T]: The result of the operation.
+        Raises:
+            RuntimeError: If an operation is already in progress.
+        """
+        if not self._operation_lock.acquire(blocking=False):
+            raise RuntimeError("Operation already in progress")
+        try:
+            result = callback()
+            self._last_message_role = result.agent_message.role_name
+            return result
+        finally:
+            self._operation_lock.release()
+    def repair_state(self, requirement: ProgrammableAgentRequirement) -> None:
+        r"""Repair the state of the agent.
+        Implements basic state repair for message role requirements.
+        Args:
+            requirement (ProgrammableAgentRequirement): The requirement to
+                repair the state for.
+        """
+        if requirement == ProgrammableAgentRequirement.LAST_MESSAGE_NOT_USER:
+            if self._last_message_role == "user":
+                raise NotImplementedError(
+                    "Must implement repair for LAST_MESSAGE_NOT_USER"
+                )

Paper2Poster/camel/agents/role_assignment_agent.py ADDED Viewed

	@@ -0,0 +1,141 @@

+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+import re
+from typing import Dict, Optional, Union
+from camel.agents.chat_agent import ChatAgent
+from camel.messages import BaseMessage
+from camel.models import BaseModelBackend
+from camel.prompts import TextPrompt
+from camel.types import RoleType
+# AgentOps decorator setting
+try:
+    import os
+    if os.getenv("AGENTOPS_API_KEY") is not None:
+        from agentops import track_agent
+    else:
+        raise ImportError
+except (ImportError, AttributeError):
+    from camel.utils import track_agent
+@track_agent(name="RoleAssignmentAgent")
+class RoleAssignmentAgent(ChatAgent):
+    r"""An agent that generates role names based on the task prompt.
+    Args:
+        model (BaseModelBackend, optional): The model backend to use for
+            generating responses. (default: :obj:`OpenAIModel` with
+            `GPT_4O_MINI`)
+    Attributes:
+        role_assignment_prompt (TextPrompt): A prompt for the agent to generate
+        role names.
+    """
+    def __init__(
+        self,
+        model: Optional[BaseModelBackend] = None,
+    ) -> None:
+        system_message = BaseMessage(
+            role_name="Role Assigner",
+            role_type=RoleType.ASSISTANT,
+            meta_dict=None,
+            content="You assign roles based on tasks.",
+        )
+        super().__init__(system_message, model=model)
+    def run(
+        self,
+        task_prompt: Union[str, TextPrompt],
+        num_roles: int = 2,
+    ) -> Dict[str, str]:
+        r"""Generate role names based on the input task prompt.
+        Args:
+            task_prompt (Union[str, TextPrompt]): The prompt
+                for the task based on which the roles are to be generated.
+            num_roles (int, optional): The number of roles to generate.
+                (default: :obj:`2`)
+        Returns:
+            Dict[str, str]: A dictionary mapping role names to their
+                descriptions.
+        """
+        self.reset()
+        expert_prompt = "===== ANSWER PROMPT =====\n" + "\n".join(
+            f"Domain expert {i + 1}: <BLANK>\n"
+            f"Associated competencies, characteristics, duties "
+            f"and workflows: <BLANK>. End."
+            for i in range(num_roles or 0)
+        )
+        role_assignment_generation_prompt = TextPrompt(
+            "You are a role assignment agent, and you're in charge of "
+            + "recruiting {num_roles} experts for the following task."
+            + "\n==== TASK =====\n {task}\n\n"
+            + "Identify the domain experts you'd recruit and detail their "
+            + "associated competencies, characteristics, duties and workflows "
+            + "to complete the task.\n "
+            + "Your answer MUST adhere to the format of ANSWER PROMPT, and "
+            + "ONLY answer the BLANKs.\n"
+            + expert_prompt
+        )
+        role_assignment_generation = role_assignment_generation_prompt.format(
+            num_roles=num_roles, task=task_prompt
+        )
+        role_assignment_generation_msg = BaseMessage.make_user_message(
+            role_name="Role Assigner", content=role_assignment_generation
+        )
+        response = self.step(input_message=role_assignment_generation_msg)
+        msg = response.msg  # type: BaseMessage
+        terminated = response.terminated
+        # Distribute the output completions into role names and descriptions
+        role_names = [
+            desc.replace("<|", "").replace("|>", "")
+            for desc in re.findall(
+                r"Domain expert \d: (.+?)\nAssociated competencies,",
+                msg.content,
+                re.DOTALL,
+            )
+        ]
+        role_descriptions = [
+            desc.replace("<|", "").replace("|>", "")
+            for desc in re.findall(
+                r"Associated competencies, characteristics, "
+                r"duties and workflows: (.+?) End.",
+                msg.content,
+                re.DOTALL,
+            )
+        ]
+        if len(role_names) != num_roles or len(role_descriptions) != num_roles:
+            raise RuntimeError(
+                "Got None or insufficient information of roles."
+            )
+        if terminated:
+            raise RuntimeError("Role assignment failed.")
+        role_descriptions_dict = {
+            role_name: description
+            for role_name, description in zip(role_names, role_descriptions)
+        }
+        return role_descriptions_dict

Paper2Poster/camel/agents/search_agent.py ADDED Viewed

	@@ -0,0 +1,133 @@

+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+from typing import Optional
+from camel.agents.chat_agent import ChatAgent
+from camel.messages import BaseMessage
+from camel.models import BaseModelBackend
+from camel.prompts import TextPrompt
+from camel.types import RoleType
+from camel.utils import create_chunks
+# AgentOps decorator setting
+try:
+    import os
+    if os.getenv("AGENTOPS_API_KEY") is not None:
+        from agentops import track_agent
+    else:
+        raise ImportError
+except (ImportError, AttributeError):
+    from camel.utils import track_agent
+@track_agent(name="SearchAgent")
+class SearchAgent(ChatAgent):
+    r"""An agent that summarizes text based on a query and evaluates the
+    relevance of an answer.
+    Args:
+        model (BaseModelBackend, optional): The model backend to use for
+            generating responses. (default: :obj:`OpenAIModel` with
+            `GPT_4O_MINI`)
+    """
+    def __init__(
+        self,
+        model: Optional[BaseModelBackend] = None,
+    ) -> None:
+        system_message = BaseMessage(
+            role_name="Assistant",
+            role_type=RoleType.ASSISTANT,
+            meta_dict=None,
+            content="You are a helpful assistant.",
+        )
+        super().__init__(system_message, model=model)
+    def summarize_text(self, text: str, query: str) -> str:
+        r"""Summarize the information from the text, base on the query.
+        Args:
+            text (str): Text to summarize.
+            query (str): What information you want.
+        Returns:
+            str: Strings with information.
+        """
+        self.reset()
+        summary_prompt = TextPrompt(
+            '''Gather information from this text that relative to the
+            question, but do not directly answer the question.\nquestion:
+            {query}\ntext '''
+        )
+        summary_prompt = summary_prompt.format(query=query)
+        # Max length of each chunk
+        max_len = 3000
+        results = ""
+        chunks = create_chunks(text, max_len)
+        # Summarize
+        for i, chunk in enumerate(chunks, start=1):
+            prompt = summary_prompt + str(i) + ": " + chunk
+            user_msg = BaseMessage.make_user_message(
+                role_name="User",
+                content=prompt,
+            )
+            result = self.step(user_msg).msg.content
+            results += result + "\n"
+        # Final summarization
+        final_prompt = TextPrompt(
+            '''Here are some summarized texts which split from one text. Using
+            the information to answer the question. If can't find the answer,
+            you must answer "I can not find the answer to the query" and
+            explain why.\n Query:\n{query}.\n\nText:\n'''
+        )
+        final_prompt = final_prompt.format(query=query)
+        prompt = final_prompt + results
+        user_msg = BaseMessage.make_user_message(
+            role_name="User",
+            content=prompt,
+        )
+        response = self.step(user_msg).msg.content
+        return response
+    def continue_search(self, query: str, answer: str) -> bool:
+        r"""Ask whether to continue search or not based on the provided answer.
+        Args:
+            query (str): The question.
+            answer (str): The answer to the question.
+        Returns:
+            bool: `True` if the user want to continue search, `False`
+            otherwise.
+        """
+        prompt = TextPrompt(
+            "Do you think the ANSWER can answer the QUERY? "
+            "Use only 'yes' or 'no' to answer.\n"
+            "===== QUERY =====\n{query}\n\n"
+            "===== ANSWER =====\n{answer}"
+        )
+        prompt = prompt.format(query=query, answer=answer)
+        user_msg = BaseMessage.make_user_message(
+            role_name="User",
+            content=prompt,
+        )
+        response = self.step(user_msg).msg.content
+        if "yes" in str(response).lower():
+            return False
+        return True

Paper2Poster/camel/agents/task_agent.py ADDED Viewed

	@@ -0,0 +1,410 @@

+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+from typing import Any, Dict, List, Optional, Union
+from camel.agents.chat_agent import ChatAgent
+from camel.messages import BaseMessage
+from camel.models import BaseModelBackend
+from camel.prompts import PromptTemplateGenerator, TextPrompt
+from camel.types import RoleType, TaskType
+from camel.utils import get_task_list
+# AgentOps decorator setting
+try:
+    import os
+    if os.getenv("AGENTOPS_API_KEY") is not None:
+        from agentops import track_agent
+    else:
+        raise ImportError
+except (ImportError, AttributeError):
+    from camel.utils import track_agent
+@track_agent(name="TaskSpecifyAgent")
+class TaskSpecifyAgent(ChatAgent):
+    r"""An agent that specifies a given task prompt by prompting the user to
+    provide more details.
+    Attributes:
+        DEFAULT_WORD_LIMIT (int): The default word limit for the task prompt.
+        task_specify_prompt (TextPrompt): The prompt for specifying the task.
+    Args:
+        model (BaseModelBackend, optional): The model backend to use for
+            generating responses. (default: :obj:`OpenAIModel` with
+            `GPT_4O_MINI`)
+        task_type (TaskType, optional): The type of task for which to generate
+            a prompt. (default: :obj:`TaskType.AI_SOCIETY`)
+        task_specify_prompt (Union[str, TextPrompt], optional): The prompt for
+            specifying the task. (default: :obj:`None`)
+        word_limit (int, optional): The word limit for the task prompt.
+            (default: :obj:`50`)
+        output_language (str, optional): The language to be output by the
+            agent. (default: :obj:`None`)
+    """
+    DEFAULT_WORD_LIMIT = 50
+    def __init__(
+        self,
+        model: Optional[BaseModelBackend] = None,
+        task_type: TaskType = TaskType.AI_SOCIETY,
+        task_specify_prompt: Optional[Union[str, TextPrompt]] = None,
+        word_limit: int = DEFAULT_WORD_LIMIT,
+        output_language: Optional[str] = None,
+    ) -> None:
+        self.task_specify_prompt: Union[str, TextPrompt]
+        if task_specify_prompt is None:
+            task_specify_prompt_template = (
+                PromptTemplateGenerator().get_task_specify_prompt(task_type)
+            )
+            self.task_specify_prompt = task_specify_prompt_template.format(
+                word_limit=word_limit
+            )
+        else:
+            self.task_specify_prompt = TextPrompt(task_specify_prompt)
+        system_message = BaseMessage(
+            role_name="Task Specifier",
+            role_type=RoleType.ASSISTANT,
+            meta_dict=None,
+            content="You can make a task more specific.",
+        )
+        super().__init__(
+            system_message,
+            model=model,
+            output_language=output_language,
+        )
+    def run(
+        self,
+        task_prompt: Union[str, TextPrompt],
+        meta_dict: Optional[Dict[str, Any]] = None,
+    ) -> TextPrompt:
+        r"""Specify the given task prompt by providing more details.
+        Args:
+            task_prompt (Union[str, TextPrompt]): The original task
+                prompt.
+            meta_dict (Dict[str, Any], optional): A dictionary containing
+                additional information to include in the prompt.
+                (default: :obj:`None`)
+        Returns:
+            TextPrompt: The specified task prompt.
+        """
+        self.reset()
+        task_specify_prompt = self.task_specify_prompt.format(task=task_prompt)
+        if meta_dict is not None:
+            task_specify_prompt = task_specify_prompt.format(**meta_dict)
+        task_msg = BaseMessage.make_user_message(
+            role_name="Task Specifier", content=task_specify_prompt
+        )
+        specifier_response = self.step(task_msg)
+        if specifier_response.terminated:
+            raise RuntimeError("Task specification failed.")
+        if len(specifier_response.msgs) == 0:
+            raise RuntimeError("Got no specification message.")
+        specified_task_msg = specifier_response.msgs[0]
+        return TextPrompt(specified_task_msg.content)
+@track_agent(name="TaskPlannerAgent")
+class TaskPlannerAgent(ChatAgent):
+    r"""An agent that helps divide a task into subtasks based on the input
+    task prompt.
+    Attributes:
+        task_planner_prompt (TextPrompt): A prompt for the agent to divide
+            the task into subtasks.
+    Args:
+        model (BaseModelBackend, optional): The model backend to use for
+            generating responses. (default: :obj:`OpenAIModel` with
+            `GPT_4O_MINI`)
+        output_language (str, optional): The language to be output by the
+            agent. (default: :obj:`None`)
+    """
+    def __init__(
+        self,
+        model: Optional[BaseModelBackend] = None,
+        output_language: Optional[str] = None,
+    ) -> None:
+        self.task_planner_prompt = TextPrompt(
+            "Divide this task into subtasks: {task}. Be concise."
+        )
+        system_message = BaseMessage(
+            role_name="Task Planner",
+            role_type=RoleType.ASSISTANT,
+            meta_dict=None,
+            content="You are a helpful task planner.",
+        )
+        super().__init__(
+            system_message,
+            model=model,
+            output_language=output_language,
+        )
+    def run(
+        self,
+        task_prompt: Union[str, TextPrompt],
+    ) -> TextPrompt:
+        r"""Generate subtasks based on the input task prompt.
+        Args:
+            task_prompt (Union[str, TextPrompt]): The prompt for the task to
+                be divided into subtasks.
+        Returns:
+            TextPrompt: A prompt for the subtasks generated by the agent.
+        """
+        # TODO: Maybe include roles information.
+        self.reset()
+        task_planner_prompt = self.task_planner_prompt.format(task=task_prompt)
+        task_msg = BaseMessage.make_user_message(
+            role_name="Task Planner", content=task_planner_prompt
+        )
+        task_response = self.step(task_msg)
+        if task_response.terminated:
+            raise RuntimeError("Task planning failed.")
+        if len(task_response.msgs) == 0:
+            raise RuntimeError("Got no task planning message.")
+        sub_tasks_msg = task_response.msgs[0]
+        return TextPrompt(sub_tasks_msg.content)
+@track_agent(name="TaskCreationAgent")
+class TaskCreationAgent(ChatAgent):
+    r"""An agent that helps create new tasks based on the objective
+    and last completed task. Compared to :obj:`TaskPlannerAgent`,
+    it's still a task planner, but it has more context information
+    like last task and incomplete task list. Modified from
+    `BabyAGI <https://github.com/yoheinakajima/babyagi>`_.
+    Attributes:
+        task_creation_prompt (TextPrompt): A prompt for the agent to
+            create new tasks.
+    Args:
+        role_name (str): The role name of the Agent to create the task.
+        objective (Union[str, TextPrompt]): The objective of the Agent to
+            perform the task.
+        model (BaseModelBackend, optional): The LLM backend to use for
+            generating responses. (default: :obj:`OpenAIModel` with
+            `GPT_4O_MINI`)
+        output_language (str, optional): The language to be output by the
+            agent. (default: :obj:`None`)
+        message_window_size (int, optional): The maximum number of previous
+            messages to include in the context window. If `None`, no windowing
+            is performed. (default: :obj:`None`)
+        max_task_num (int, optional): The maximum number of planned
+            tasks in one round. (default: :obj:3)
+    """
+    def __init__(
+        self,
+        role_name: str,
+        objective: Union[str, TextPrompt],
+        model: Optional[BaseModelBackend] = None,
+        output_language: Optional[str] = None,
+        message_window_size: Optional[int] = None,
+        max_task_num: Optional[int] = 3,
+    ) -> None:
+        task_creation_prompt = TextPrompt(
+            """Create new a task with the following objective: {objective}.
+Never forget you are a Task Creator of {role_name}.
+You must instruct me based on my expertise and your needs to solve the task.
+You should consider past solved tasks and in-progress tasks: {task_list}.
+The new created tasks must not overlap with these past tasks.
+The result must be a numbered list in the format:
+    #. First Task
+    #. Second Task
+    #. Third Task
+You can only give me up to {max_task_num} tasks at a time. \
+Each task should be concise, concrete and doable for a {role_name}.
+You should make task plan and not ask me questions.
+If you think no new tasks are needed right now, write "No tasks to add."
+Now start to give me new tasks one by one. No more than three tasks.
+Be concrete.
+"""
+        )
+        self.task_creation_prompt = task_creation_prompt.format(
+            objective=objective, role_name=role_name, max_task_num=max_task_num
+        )
+        self.objective = objective
+        system_message = BaseMessage(
+            role_name="Task Creator",
+            role_type=RoleType.ASSISTANT,
+            meta_dict=None,
+            content="You are a helpful task creator.",
+        )
+        super().__init__(
+            system_message,
+            model=model,
+            output_language=output_language,
+            message_window_size=message_window_size,
+        )
+    def run(
+        self,
+        task_list: List[str],
+    ) -> List[str]:
+        r"""Generate subtasks based on the previous task results and
+        incomplete task list.
+        Args:
+            task_list (List[str]): The completed or in-progress
+                tasks which should not overlap with new created tasks.
+        Returns:
+            List[str]: The new task list generated by the Agent.
+        """
+        if len(task_list) > 0:
+            task_creation_prompt = self.task_creation_prompt.format(
+                task_list=task_list
+            )
+        else:
+            task_creation_prompt = self.task_creation_prompt.format(
+                task_list=""
+            )
+        task_msg = BaseMessage.make_user_message(
+            role_name="Task Creator", content=task_creation_prompt
+        )
+        task_response = self.step(task_msg)
+        if task_response.terminated:
+            raise RuntimeError("Task creation failed.")
+        if len(task_response.msgs) == 0:
+            raise RuntimeError("Got no task creation message.")
+        sub_tasks_msg = task_response.msgs[0]
+        return get_task_list(sub_tasks_msg.content)
+@track_agent(name="TaskPrioritizationAgent")
+class TaskPrioritizationAgent(ChatAgent):
+    r"""An agent that helps re-prioritize the task list and
+    returns numbered prioritized list. Modified from
+    `BabyAGI <https://github.com/yoheinakajima/babyagi>`_.
+    Attributes:
+        task_prioritization_prompt (TextPrompt): A prompt for the agent to
+            prioritize tasks.
+    Args:
+        objective (Union[str, TextPrompt]): The objective of the Agent to
+            perform the task.
+        model (BaseModelBackend, optional): The LLM backend to use for
+            generating responses. (default: :obj:`OpenAIModel` with
+            `GPT_4O_MINI`)
+        output_language (str, optional): The language to be output by the
+            agent. (default: :obj:`None`)
+        message_window_size (int, optional): The maximum number of previous
+            messages to include in the context window. If `None`, no windowing
+            is performed. (default: :obj:`None`)
+    """
+    def __init__(
+        self,
+        objective: Union[str, TextPrompt],
+        model: Optional[BaseModelBackend] = None,
+        output_language: Optional[str] = None,
+        message_window_size: Optional[int] = None,
+    ) -> None:
+        task_prioritization_prompt = TextPrompt(
+            """Prioritize the following tasks : {task_list}.
+Consider the ultimate objective of you: {objective}.
+Tasks should be sorted from highest to lowest priority, where higher-priority \
+tasks are those that act as pre-requisites or are more essential for meeting \
+the objective. Return one task per line in your response.
+Do not remove or modify any tasks.
+The result must be a numbered list in the format:
+    #. First task
+    #. Second task
+The entries must be consecutively numbered, starting with 1.
+The number of each entry must be followed by a period.
+Do not include any headers before your ranked list or follow your list \
+with any other output."""
+        )
+        self.task_prioritization_prompt = task_prioritization_prompt.format(
+            objective=objective
+        )
+        self.objective = objective
+        system_message = BaseMessage(
+            role_name="Task Prioritizer",
+            role_type=RoleType.ASSISTANT,
+            meta_dict=None,
+            content="You are a helpful task prioritizer.",
+        )
+        super().__init__(
+            system_message,
+            model=model,
+            output_language=output_language,
+            message_window_size=message_window_size,
+        )
+    def run(
+        self,
+        task_list: List[str],
+    ) -> List[str]:
+        r"""Prioritize the task list given the agent objective.
+        Args:
+            task_list (List[str]): The unprioritized tasks of agent.
+        Returns:
+            List[str]: The new prioritized task list generated by the Agent.
+        """
+        task_prioritization_prompt = self.task_prioritization_prompt.format(
+            task_list=task_list
+        )
+        task_msg = BaseMessage.make_user_message(
+            role_name="Task Prioritizer", content=task_prioritization_prompt
+        )
+        task_response = self.step(task_msg)
+        if task_response.terminated:
+            raise RuntimeError("Task prioritization failed.")
+        if len(task_response.msgs) == 0:
+            raise RuntimeError("Got no task prioritization message.")
+        sub_tasks_msg = task_response.msgs[0]
+        return get_task_list(sub_tasks_msg.content)

Paper2Poster/camel/agents/tool_agents/__init__.py ADDED Viewed

	@@ -0,0 +1,20 @@

+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+from .base import BaseToolAgent
+from .hugging_face_tool_agent import HuggingFaceToolAgent
+__all__ = [
+    'BaseToolAgent',
+    'HuggingFaceToolAgent',
+]

Paper2Poster/camel/agents/tool_agents/base.py ADDED Viewed

	@@ -0,0 +1,39 @@

+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+from camel.agents import BaseAgent
+class BaseToolAgent(BaseAgent):
+    r"""Creates a :obj:`BaseToolAgent` object with the specified name and
+        description.
+    Args:
+        name (str): The name of the tool agent.
+        description (str): The description of the tool agent.
+    """
+    def __init__(self, name: str, description: str) -> None:
+        self.name = name
+        self.description = description
+    def reset(self) -> None:
+        r"""Resets the agent to its initial state."""
+        pass
+    def step(self) -> None:
+        r"""Performs a single step of the agent."""
+        pass
+    def __str__(self) -> str:
+        return f"{self.name}: {self.description}"

Paper2Poster/camel/agents/tool_agents/hugging_face_tool_agent.py ADDED Viewed

	@@ -0,0 +1,206 @@

+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+from typing import Any, Optional
+from camel.agents.tool_agents.base import BaseToolAgent
+# flake8: noqa :E501
+class HuggingFaceToolAgent(BaseToolAgent):
+    r"""Tool agent for calling HuggingFace models. This agent is a wrapper
+        around agents from the `transformers` library. For more information
+        about the available models, please see the `transformers` documentation
+        at https://huggingface.co/docs/transformers/transformers_agents.
+    Args:
+        name (str): The name of the agent.
+        *args (Any): Additional positional arguments to pass to the underlying
+            Agent class.
+        remote (bool, optional): Flag indicating whether to run the agent
+            remotely. (default: :obj:`True`)
+        **kwargs (Any): Additional keyword arguments to pass to the underlying
+            Agent class.
+    """
+    def __init__(
+        self,
+        name: str,
+        *args: Any,
+        remote: bool = True,
+        **kwargs: Any,
+    ) -> None:
+        try:
+            # TODO: Support other tool agents
+            import transformers
+            from packaging import version
+            if version.parse(transformers.__version__) < version.parse(
+                "4.31.0"
+            ):
+                raise ValueError(
+                    "The version of \"transformers\" package should >= 4.31.0"
+                )
+            from transformers.tools import OpenAiAgent
+            from transformers.tools.agent_types import AgentImage
+        except (ImportError, ValueError):
+            raise ValueError(
+                "Could not import transformers tool agents. "
+                "Please setup the environment with "
+                "pip install huggingface_hub==0.14.1 transformers==4.31.0 diffusers accelerate==0.20.3 datasets torch soundfile sentencepiece opencv-python"
+            )
+        self.agent_image_type = AgentImage
+        self.agent = OpenAiAgent(*args, **kwargs)
+        description = f"""The `{name}` is a tool agent that can perform a variety of tasks including:
+- Document question answering: given a document (such as a PDF) in image format, answer a question on this document
+- Text question answering: given a long text and a question, answer the question in the text
+- Unconditional image captioning: Caption the image!
+- Image question answering: given an image, answer a question on this image
+- Image segmentation: given an image and a prompt, output the segmentation mask of that prompt
+- Speech to text: given an audio recording of a person talking, transcribe the speech into text
+- Text to speech: convert text to speech
+- Zero-shot text classification: given a text and a list of labels, identify to which label the text corresponds the most
+- Text summarization: summarize a long text in one or a few sentences
+- Translation: translate the text into a given language
+- Text downloading: to download a text from a web URL
+- Text to image: generate an image according to a prompt, leveraging stable diffusion
+- Image transformation: modify an image given an initial image and a prompt, leveraging instruct pix2pix stable diffusion
+- Text to video: generate a small video according to a prompt
+Here are some python code examples of what you can do with this agent:
+Single execution (step) mode, the single execution method is when using the step() method of the agent:
+```
+# Text to image
+rivers_and_lakes_image = {name}.step("Draw me a picture of rivers and lakes.")
+rivers_and_lakes_image.save("./rivers_and_lakes_image.png")
+# Text to image -> Image transformation
+sea_add_island_image = {name}.step("Draw me a picture of the sea then transform the picture to add an island")
+sea_add_island_image.save("./sea_add_island_image.png")
+# If you'd like to keep a state across executions or to pass non-text objects to the agent,
+# you can do so by specifying variables that you would like the agent to use. For example,
+# you could generate the first image of rivers and lakes, and ask the model to update that picture to add an island by doing the following:
+picture = {name}.step("Generate a picture of rivers and lakes.")
+picture.save("./picture.png")
+updated_picture = {name}.step("Transform the image in `picture` to add an island to it.", picture=picture)
+updated_picture.save("./updated_picture.png")
+capybara_sea_image = {name}.step("Draw me a picture of the `prompt`", prompt="a capybara swimming in the sea")
+capybara_sea_image.save("./capybara_sea_image.png")
+# Document question answering
+answer = {name}.step(
+    "In the following `document`, where will the TRRF Scientific Advisory Council Meeting take place?",
+    document=document,
+)
+print(answer)
+# Text to image
+boat_image = {name}.step("Generate an image of a boat in the water")
+boat_image.save("./boat_image.png")
+# Unconditional image captioning
+boat_image_caption = {name}.step("Can you caption the `boat_image`?", boat_image=boat_image)
+print(boat_image_caption)
+# Text to image -> Unconditional image captioning -> Text to speech
+boat_audio = {name}.step("Can you generate an image of a boat? Please read out loud the contents of the image afterwards")
+# Text downloading
+document = {name}.step("Download the text from http://hf.co")
+print(document)
+# Text summarization
+summary = {name}.step("Summarize the following text: `document`", document=document)
+print(summary)
+# Text downloading -> Text summarization -> Text to speech
+audio = {name}.step("Read out loud the summary of http://hf.co")
+```
+Chat-based execution (chat), the agent also has a chat-based approach, using the chat() method:
+```
+# Clean the chat history
+{name}.reset()
+# Text to image
+capybara_image = {name}.chat("Show me an an image of a capybara")
+capybara_image.save("./capybara_image.png")
+# Image transformation
+transformed_capybara_image = {name}.chat("Transform the image so that it snows")
+transformed_capybara_image.save("./transformed_capybara_image.png")
+# Image segmentation
+segmented_transformed_capybara_image = {name}.chat("Show me a mask of the snowy capybaras")
+segmented_transformed_capybara_image.save("./segmented_transformed_capybara_image.png")
+```
+"""
+        super(HuggingFaceToolAgent, self).__init__(name, description)
+        self.remote = remote
+    def reset(self) -> None:
+        r"""Resets the chat history of the agent."""
+        self.agent.prepare_for_new_chat()
+    def step(
+        self,
+        *args: Any,
+        remote: Optional[bool] = None,
+        **kwargs: Any,
+    ) -> Any:
+        r"""Runs the agent in single execution mode.
+        Args:
+            *args (Any): Positional arguments to pass to the agent.
+            remote (bool, optional): Flag indicating whether to run the agent
+                remotely. Overrides the default setting. (default: :obj:`None`)
+            **kwargs (Any): Keyword arguments to pass to the agent.
+        Returns:
+            str: The response from the agent.
+        """
+        if remote is None:
+            remote = self.remote
+        agent_output = self.agent.run(*args, remote=remote, **kwargs)
+        if isinstance(agent_output, self.agent_image_type):
+            agent_output = agent_output.to_raw()
+        return agent_output
+    def chat(
+        self,
+        *args: Any,
+        remote: Optional[bool] = None,
+        **kwargs: Any,
+    ) -> Any:
+        r"""Runs the agent in a chat conversation mode.
+        Args:
+            *args (Any): Positional arguments to pass to the agent.
+            remote (bool, optional): Flag indicating whether to run the agent
+                remotely. Overrides the default setting. (default: :obj:`None`)
+            **kwargs (Any): Keyword arguments to pass to the agent.
+        Returns:
+            str: The response from the agent.
+        """
+        if remote is None:
+            remote = self.remote
+        agent_output = self.agent.chat(*args, remote=remote, **kwargs)
+        if isinstance(agent_output, self.agent_image_type):
+            agent_output = agent_output.to_raw()
+        return agent_output

Paper2Poster/camel/benchmarks/__init__.py ADDED Viewed

	@@ -0,0 +1,30 @@

+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+from .apibank import APIBankBenchmark
+from .apibench import APIBenchBenchmark
+from .base import BaseBenchmark
+from .gaia import DefaultGAIARetriever, GAIABenchmark
+from .nexus import NexusBenchmark
+from .ragbench import RAGBenchBenchmark
+__all__ = [
+    "BaseBenchmark",
+    "GAIABenchmark",
+    "DefaultGAIARetriever",
+    "NexusBenchmark",
+    "APIBenchBenchmark",
+    "APIBankBenchmark",
+    "RAGBenchBenchmark",
+]

Paper2Poster/camel/benchmarks/apibank.py ADDED Viewed

	@@ -0,0 +1,565 @@

+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+import json
+import logging
+import os
+import random
+import re
+import sys
+from pathlib import Path
+from typing import Any, Dict, List, Literal, Optional
+import numpy as np
+from rouge import Rouge
+from tqdm import tqdm
+from camel.agents import ChatAgent
+from camel.benchmarks.base import BaseBenchmark
+from camel.messages import BaseMessage
+from camel.utils import download_github_subdirectory
+logger = logging.getLogger(__name__)
+# Add current folder to sys.path to enable relative import
+current_folder = os.getcwd()
+if current_folder not in sys.path:
+    sys.path.append(current_folder)
+def process_messages(
+    chat_history: List[Dict[str, Any]],
+    prompt: str,
+) -> List[Dict[str, str]]:
+    """
+    Processes chat history into a structured format for further use.
+    Args:
+        chat_history (List[Dict[str, Any]):
+            A list of dictionaries representing the chat history.
+        prompt (str): A propmt to be set as the system message.
+    Returns:
+        List[Dict[str, str]]: A list of dictionaries representing
+            the processed messages, where each dictionary has:
+        - 'role': The role of the message ('system', 'user', or 'assistant').
+        - 'content': The content of the message, including formatted
+            API responses when applicable.
+    """
+    messages = [{'role': 'system', 'content': prompt}]
+    for item in chat_history:
+        role_map = {'User': 'user', 'AI': 'assistant', 'API': 'system'}
+        chat_role = role_map.get(
+            item['role'], 'unknown'
+        )  # default role to 'unknown'
+        if item['role'] == 'API':
+            chat_content = '[{}({})] Response: {}'.format(
+                item['api_name'],
+                ', '.join(
+                    [
+                        '{}=\'{}\''.format(k, v)
+                        for k, v in item['param_dict'].items()
+                    ]
+                ),
+                str(item['result']['output']),
+            )
+        else:
+            chat_content = item['text']
+        messages.append({'role': chat_role, 'content': chat_content})
+    return messages
+class APIBankBenchmark(BaseBenchmark):
+    r"""API-Bank Benchmark adapted from `API-Bank:
+    A Comprehensive Benchmark for Tool-Augmented LLMs`
+    <https://github.com/AlibabaResearch/DAMO-ConvAI/tree/main/api-bank>.
+    Args:
+        save_to (str): The file to save the results.
+        processes (int, optional): The number of processes to use.
+            (default: :obj:`1`)
+    """
+    def __init__(
+        self,
+        save_to: str,
+        processes: int = 1,
+    ):
+        r"""Initialize the APIBank benchmark.
+        Args:
+            save_to (str): The file to save the results.
+            processes (int, optional): The number of processes to use for
+                parallel processing. (default: :obj:`1`)
+        """
+        # Predefine data_dir for better import management
+        super().__init__("apibank", "api_bank", save_to, processes)
+        self._data: Dict[str, List[APIBankSample]] = dict()  # type: ignore[assignment]
+    def download(self):
+        r"""Download APIBank dataset and code from Github."""
+        repo = "AlibabaResearch/DAMO-ConvAI"
+        subdir = "api-bank"
+        data_dir = self.data_dir
+        download_github_subdirectory(repo, subdir, data_dir)
+        sys.path.insert(0, self.data_dir)
+        logger.info("Download completed.")
+    def load(self, level: str, force_download: bool = False):  # type: ignore[override]
+        r"""Load the APIBank Benchmark dataset.
+        Args:
+            level (str): Level to run benchmark on.
+            force_download (bool, optional): Whether to
+                force download the data.
+        """
+        if force_download:
+            logger.info("Force downloading data.")
+            self.download()
+        if level == "level-1":
+            file_path = Path("api_bank/lv1-lv2-samples/level-1-given-desc")
+        elif level == 'level-2':
+            file_path = Path("api_bank/lv1-lv2-samples/level-2-toolsearcher")
+        jsonl_files = [
+            f for f in os.listdir(file_path) if f.endswith('.jsonl')
+        ]
+        for file in tqdm(jsonl_files, desc="Processing files"):
+            history = []
+            with open(file_path / file, 'r') as f:
+                for line in f:
+                    history.append(json.loads(line))
+                samples = APIBankSample.from_chat_history(history)
+                self._data[file.rsplit('.', 1)[0]] = samples
+        # Change import to relative import in the downloaded python files
+        def process_files(folder_path, replacements):
+            r"""Replace absolute imports in downloaded files with
+            relative import."""
+            for file in os.listdir(folder_path):
+                if file.endswith(".py"):
+                    file_path = os.path.join(folder_path, file)
+                    try:
+                        with open(file_path, "r", encoding="utf-8") as file:
+                            content = file.read()
+                        original_content = content
+                        for pattern, replacement in replacements:
+                            content = re.sub(pattern, replacement, content)
+                        if content != original_content:
+                            with open(
+                                file_path, "w", encoding="utf-8"
+                            ) as file:
+                                file.write(content)
+                            logger.info(f"Updated file: {file_path}")
+                    except Exception as e:
+                        logger.info(f"Error processing file {file_path}: {e}")
+        api_bank_folder = "api_bank"
+        apis_folder = os.path.join(api_bank_folder, "apis")
+        apis_replacements = [
+            (r"from apis.api", "from .api"),
+            (r"from apis import", "from .api import"),
+        ]
+        api_bank_replacements = [
+            (r"from apis", "from .apis"),
+            (r"from api_call_extraction", "from .api_call_extraction"),
+            (r"f'{basename}", r"f'api_bank.{basename}"),
+        ]
+        process_files(apis_folder, apis_replacements)
+        process_files(api_bank_folder, api_bank_replacements)
+    def run(  # type: ignore[override, return]
+        self,
+        agent: ChatAgent,
+        level: Literal["level-1", "level-2"],
+        api_test_enabled=True,
+        randomize: bool = False,
+        subset: Optional[int] = None,
+    ) -> Dict[str, Any]:
+        r"""Run the benchmark.
+        Args:
+            agent (ChatAgent): The agent to run the
+                benchmark.
+            level (Literal['level-1', 'level-2']):
+                The level to run the benchmark on.
+            randomize (bool, optional): Whether to
+                randomize the data.
+            api_test_enabled (bool): Whether to test
+            API calling (`True`) or response (`False`)
+                (default: :obj:`False`)
+            subset (Optional[int], optional):
+            The subset of data to run.
+                (default: :obj:`None`)
+        Returns:
+            Dict[str, Any]: The results of the benchmark.
+        """
+        logger.info(f"Running APIBench benchmark on {level}.")
+        self.load(level)
+        datas = self._data
+        # Shuffle and subset data if necessary
+        if randomize:
+            randomized_items = list(datas.items())
+            random.shuffle(randomized_items)
+            datas = dict(randomized_items)
+        if subset:
+            datas = dict(list(datas.items())[:subset])
+        logger.info(f"Number of tasks: {len(datas)}")
+        # Initialize results storage
+        self._results = []
+        # The following code are adapted from the evaluator
+        # from the original repo:
+        tool_search_enabled = level == "level-2"
+        dialog_test_enabled = not api_test_enabled
+        total_api_calls, correct_api_calls, rougel_scores = 0, 0, []
+        with open(self.save_to, "w") as f:
+            for test in tqdm(datas, desc="Running"):
+                samples = self._data[test]
+                evaluator = Evaluator(samples)  # type: ignore[arg-type]
+                for sample_id in evaluator.get_all_sample_ids():
+                    # Process sample and generate response
+                    sample = evaluator.dataset[sample_id]
+                    if (
+                        sample.ground_truth['role'] == 'API'
+                        and api_test_enabled
+                    ):
+                        if tool_search_enabled:
+                            _, chat_history = evaluator.get_model_input(
+                                sample_id
+                            )
+                            api_descriptions = evaluator.get_api_description(
+                                'ToolSearcher'
+                            )
+                        else:
+                            api_descriptions, chat_history = (
+                                evaluator.get_model_input(sample_id)
+                            )
+                        messages = process_messages(
+                            chat_history, API_CALL_PROMPT + api_descriptions
+                        )
+                        model_output = agent_call(messages, agent)
+                        api_call = get_api_call(model_output)
+                        # Evaluate API call
+                        if api_call:
+                            try:
+                                correct, model_output_result = (
+                                    evaluator.evaluate(sample_id, api_call)
+                                )
+                            except AssertionError as e:
+                                if 'The API name is not correct.' not in str(
+                                    e
+                                ):
+                                    raise e
+                                logging.info('AssertionError: {}'.format(e))
+                                correct = False
+                        else:
+                            model_output_result = 'No API call found'
+                            correct = False
+                        if correct:
+                            correct_api_calls += 1
+                            logging.info(
+                                'Correct API call: {} Ground truth: {}'.format(
+                                    api_call, sample.ground_truth
+                                )
+                            )
+                        else:
+                            logging.info(
+                                'Incorrect model output: {} Result: {} \
+                                Ground truth: {} File: {} Sample ID: {} \
+                                Messages: {}'.format(
+                                    model_output.replace('\n', ' '),
+                                    model_output_result,
+                                    sample.ground_truth,
+                                    test,
+                                    sample_id,
+                                    messages[1:],
+                                )
+                            )
+                        total_api_calls += 1
+                        self._results.append(
+                            {
+                                'Role': 'API',
+                                'Model_output': model_output,
+                                'Model_output_result': model_output_result,
+                                'Ground_truth': sample.ground_truth,
+                                'Test': test,
+                                'Correct': correct,
+                            }
+                        )
+                        f.write(json.dumps(self._results[-1], indent=2) + "\n")
+                    elif (
+                        sample.ground_truth['role'] == 'AI'
+                        and dialog_test_enabled
+                    ):
+                        # Process sample and generate response
+                        api_descriptions, chat_history = (
+                            evaluator.get_model_input(sample_id)
+                        )
+                        messages = process_messages(
+                            chat_history, RESPONSE_PROMPT + api_descriptions
+                        )
+                        model_output = agent_call(messages, agent)
+                        # Evaluate model response
+                        if model_output:
+                            score = evaluator.evaluate(sample_id, model_output)
+                        else:
+                            score = 0
+                        rougel_scores.append(score)
+                        if score < 0.2:
+                            logging.info(
+                                'Low score: {} Score: {} Ground truth: {} \
+                                Test: {} Sample ID: {} \
+                                Messages: {}'.format(
+                                    model_output.replace('\n', ' '),
+                                    score,
+                                    sample.ground_truth,
+                                    test,
+                                    sample_id,
+                                    messages[1:],
+                                )
+                            )
+                        self._results.append(
+                            {
+                                'Role': 'AI',
+                                'Model_output': model_output,
+                                'Score': score,
+                                'Ground_truth': sample.ground_truth,
+                                'Test': test,
+                            }
+                        )
+                        f.write(json.dumps(self._results[-1], indent=2) + "\n")
+                    f.flush()
+        if api_test_enabled:
+            return {
+                'total': total_api_calls,
+                'correct': correct_api_calls,
+                "accuracy": correct_api_calls / total_api_calls
+                if total_api_calls
+                else 0,
+            }
+        elif dialog_test_enabled:
+            return {'Dialog_score': np.mean(rougel_scores)}
+# The following code are migrated from the original repo:
+# https://github.com/AlibabaResearch/DAMO-ConvAI/tree/main/api-bank
+def agent_call(messages: List[Dict], agent: ChatAgent):
+    r"""Add messages to agent memory and get response."""
+    for i, msg in enumerate(messages):
+        if msg['role'] == 'user':
+            message = BaseMessage.make_user_message(
+                role_name="CAMEL User", content=msg['content']
+            )
+        elif msg['role'] == 'assistant':
+            message = BaseMessage.make_assistant_message(
+                role_name="CAMEL Assistant", content=msg['content']
+            )
+        elif msg['role'] == 'system':
+            message = BaseMessage.make_assistant_message(
+                role_name="System", content=msg['content']
+            )
+        else:
+            raise ValueError(f"Unrecognized role: {msg['role']}")
+        if i == len(messages) - 1:
+            break
+        agent.record_message(message)
+    response = agent.step(message)
+    model_output = response.msgs[0].content
+    agent.reset()
+    return model_output
+def calculate_rouge_l_score(reference, hypothesis):
+    r"""Calculate rouge l score between hypothesis and reference."""
+    rouge = Rouge()
+    scores = rouge.get_scores(hypothesis, reference)
+    rouge_l_score = scores[0]['rouge-l']['f']
+    return rouge_l_score
+def get_api_call(model_output):
+    r"""Parse api call from model output."""
+    api_call_pattern = r"\[(\w+)\((.*)\)\]"
+    api_call_pattern = re.compile(api_call_pattern)
+    match = api_call_pattern.search(model_output)
+    if match:
+        return match.group(0)
+    else:
+        return None
+class APIBankSample:
+    r"""APIBank sample used to load the datasets."""
+    def __init__(self, chat_history, apis, ground_truth):
+        self.chat_history = chat_history
+        self.apis = apis
+        self.ground_truth = ground_truth
+    def __repr__(self):
+        return 'Sample(chat_history={}, apis={}, ground_truth={})'.format(
+            self.chat_history, self.apis, self.ground_truth
+        )
+    @classmethod
+    def from_chat_history(cls, chat_history):
+        apis = set()
+        api_positions = []
+        for i, item in enumerate(chat_history):
+            if item['role'] == 'API':
+                apis.add(item['api_name'])
+                api_positions.append(i)
+        samples = []
+        for i in api_positions:
+            sample = cls(chat_history[:i], apis, chat_history[i])
+            samples.append(sample)
+            sample = cls(chat_history[: i + 1], apis, chat_history[i + 1])
+            samples.append(sample)
+        return samples
+class Evaluator:
+    r"""Evaluator for APIBank benchmark."""
+    def __init__(self, samples: List[APIBankSample]):
+        # Place holder for import as the import
+        # only works after the files have been downloaded
+        try:
+            from api_bank.tool_manager import (  # type: ignore[import-not-found]
+                ToolManager,
+            )
+        except Exception as e:
+            logger.info(f"{e}, Module will be imported after download.")
+        self.dataset = samples
+        self.sample_ids = list(range(len(self.dataset)))
+        os.chdir("api_bank")
+        self.tool_manager = ToolManager("apis")
+        os.chdir("..")
+    def get_all_sample_ids(self):
+        return self.sample_ids
+    def get_api_description(self, api_name):
+        return self.tool_manager.get_api_description(api_name)
+    def get_model_input(self, sample_id: int):
+        sample = self.dataset[sample_id]
+        apis = sample.apis
+        chat_history = sample.chat_history
+        api_descriptions = []
+        for api_name in apis:
+            api_descriptions.append(
+                self.tool_manager.get_api_description(api_name)
+            )
+        api_description = '\n'.join(api_descriptions)
+        return api_description, chat_history
+    def evaluate(self, sample_id, model_output):
+        try:
+            from api_bank.api_call_extraction import (  # type: ignore[import-not-found]
+                parse_api_call,
+            )
+        except Exception as e:
+            logger.info(f"{e}, Module will be imported after download.")
+        sample = self.dataset[sample_id]
+        ground_truth = sample.ground_truth
+        if ground_truth['role'] == 'API':
+            api_name, param_dict = parse_api_call(model_output)
+            if api_name != ground_truth['api_name']:
+                return False, 'API Name Mismatch: {} vs {}'.format(
+                    api_name, ground_truth['api_name']
+                )
+            try:
+                result = self.tool_manager.api_call(api_name, **param_dict)
+            except Exception as e:
+                return False, str(e)
+            api = self.tool_manager.init_tool(api_name)
+            try:
+                correct = api.check_api_call_correctness(
+                    result, ground_truth['result']
+                )
+            except KeyError:
+                correct = False
+                result = 'KeyError' + str(result)
+            return correct, result
+        elif ground_truth['role'] == 'AI':
+            score = calculate_rouge_l_score(ground_truth['text'], model_output)
+            return round(score, 4)
+API_CALL_PROMPT = '''
+Based on the given API description and the existing \
+conversation history 1..t, please generate the API request \
+that the AI should call in step t+1 and output it in the \
+format of [ApiName(key1='value1', key2='value2', ...)], \
+replace the ApiName with the actual API name, and \
+replace the key and value with the actual parameters. \
+Your output should start with a square bracket "[" \
+and end with a square bracket "]". Do not output any \
+other explanation or prompt or the result of the API call in your output.
+This year is 2023.
+Input:
+User: [User's utterence]
+AI: [AI's utterence]
+Expected output:
+[ApiName(key1='value1', key2='value2', ...)]
+API descriptions:
+'''
+RESPONSE_PROMPT = '''
+Based on the given API description and the existing \
+conversation history 1..t, please generate the next \
+dialog that the AI should response after the API call t.
+This year is 2023.
+Input:
+User: [User's utterence]
+AI: [AI's utterence]
+[ApiName(key1='value1', key2='value2', …)]
+Expected output:
+AI: [AI's utterence]
+API descriptions:
+'''

Paper2Poster/camel/benchmarks/apibench.py ADDED Viewed

	@@ -0,0 +1,500 @@

+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+import json
+import logging
+import random
+from pathlib import Path
+from typing import Any, Dict, Literal, Optional
+import tree_sitter_python as tspython
+from tqdm import tqdm
+from tree_sitter import Language, Parser
+from camel.agents import ChatAgent
+from camel.benchmarks.base import BaseBenchmark
+from camel.messages import BaseMessage
+from camel.utils import download_github_subdirectory
+logger = logging.getLogger(__name__)
+# Mapping of dataset names to file names
+# 'Oracle' retriver used here which means all the full
+# API documentation will be included in the prompt
+dataset_mapping = {
+    "huggingface": {
+        "api": "huggingface_api.jsonl",
+        "eval": "huggingface_eval.json",
+        "train": "huggingface_train.json",
+        "questions": "questions_huggingface_oracle.jsonl",
+    },
+    "tensorflowhub": {
+        "api": "tensorflowhub_api.jsonl",
+        "eval": "tensorflow_eval.json",
+        "train": "tensorflow_train.json",
+        "questions": "questions_tensorflowhub_oracle.jsonl",
+    },
+    "torchhub": {
+        "api": "torchhub_api.jsonl",
+        "eval": "torchhub_eval.json",
+        "train": "torchhub_train.json",
+        "questions": "questions_torchhub_oracle.jsonl",
+    },
+}
+# This function is migrated from the original repo:
+# https://github.com/ShishirPatil/gorilla
+def encode_question(question: str, dataset_name: str) -> str:
+    r"""Encode multiple prompt instructions into a single string."""
+    if dataset_name == "torchhub":
+        domains = "1. $DOMAIN is inferred from the task description and \
+        should include one of {Classification, Semantic Segmentation, \
+        Object Detection, Audio Separation, Video Classification, \
+        Text-to-Speech}."
+    elif dataset_name == "huggingface":
+        domains = "1. $DOMAIN should include one of {Multimodal Feature \
+            Extraction, Multimodal Text-to-Image, Multimodal \
+            Image-to-Text, Multimodal Text-to-Video, \
+            Multimodal Visual Question Answering, Multimodal Document \
+            Question Answer, Multimodal Graph Machine Learning, \
+            Computer Vision Depth Estimation, Computer Vision Image \
+            Classification, Computer Vision Object Detection, \
+            Computer Vision Image Segmentation, Computer Vision \
+            Image-to-Image, Computer Vision Unconditional \
+            Image Generation, Computer Vision Video Classification, \
+            Computer Vision Zero-Shor Image Classification, \
+            Natural Language Processing Text Classification, \
+            Natural Language Processing Token Classification, \
+            Natural Language Processing Table Question Answering, \
+            Natural Language Processing Question Answering, \
+            Natural Language Processing, Zero-Shot Classification \
+            Natural Language Processing Translation, Natural Language \
+            Processing Summarization, Natural Language Processing \
+            Conversational, Natural Language Processing Text \
+            Generation, Natural Language Processing Fill-Mask, \
+            Natural Language Processing Text2Text Generation, \
+            Natural Language Processing Sentence Similarity, \
+            Audio Text-to-Speech, Audio Automatic Speech Recognition, \
+            Audio Audio-to-Audio, Audio Audio Classification, \
+            Audio Voice Activity Detection, Tabular Tabular \
+            Classification, Tabular Tabular Regression, \
+            Reinforcement Learning Reinforcement Learning, \
+            Reinforcement Learning Robotics }"
+    elif dataset_name == "tensorflowhub":
+        domains = "1. $DOMAIN is inferred from the task description \
+        and should include one of {text-sequence-alignment, \
+        text-embedding, text-language-model, text-preprocessing, \
+        text-classification, text-generation, text-question-answering, \
+        text-retrieval-question-answering, text-segmentation, \
+        text-to-mel, image-classification, image-feature-vector, \
+        image-object-detection, image-segmentation, \
+        image-generator, image-pose-detection, image-rnn-agent, \
+        image-augmentation, image-classifier, image-style-transfer, \
+        image-aesthetic-quality, image-depth-estimation, \
+        image-super-resolution, image-deblurring, image-extrapolation, \
+        image-text-recognition, image-dehazing, image-deraining, \
+        image-enhancemenmt, image-classification-logits, \
+        image-frame-interpolation, image-text-detection, image-denoising, \
+        image-others, video-classification, video-feature-extraction, \
+        video-generation, video-audio-text, video-text, \
+        audio-embedding, audio-event-classification, audio-command-detection, \
+        audio-paralinguists-classification, audio-speech-to-text, \
+        audio-speech-synthesis, audio-synthesis, audio-pitch-extraction}"
+    else:
+        logger.info("Error: API name is not supported.")
+    prompt = (
+        question
+        + "\nWrite a python program in 1 to 2 lines to call API in "
+        + dataset_name
+        + ".\n\nThe answer should follow the format: <<<domain>>> $DOMAIN, \
+        <<<api_call>>>: $API_CALL, <<<api_provider>>>: $API_PROVIDER, \
+        <<<explanation>>>: $EXPLANATION, <<<code>>>: $CODE}. \
+        Here are the requirements:\n"
+        + domains
+        + "\n2. The $API_CALL should have only 1 line of code \
+        that calls api.\n 3. The $API_PROVIDER should be the \
+        programming framework used.\n4. $EXPLANATION should be \
+        a step-by-step explanation.\n5. The $CODE is the python code.\n6. \
+        Do not repeat the format in your answer."
+    )
+    return prompt
+class APIBenchBenchmark(BaseBenchmark):
+    r"""APIBench Benchmark adopted from `Gorilla: Large Language Model
+    Connected with Massive APIs`
+    <https://huggingface.co/datasets/gorilla-llm/APIBench>.
+    Args:
+        data_dir (str): The directory to save the data.
+        save_to (str): The file to save the results.
+        processes (int, optional): The number of processes to use.
+            (default: :obj:`1`)
+    """
+    # TODO: Integrate retriever (pending)
+    def __init__(
+        self,
+        data_dir: str,
+        save_to: str,
+        processes: int = 1,
+    ):
+        r"""Initialize the APIBench benchmark.
+        Args:
+            data_dir (str): The directory to save the data.
+            save_to (str): The file to save the results.
+            processes (int, optional): The number of processes to use for
+                parallel processing. (default: :obj:`1`)
+        """
+        super().__init__("apibench", data_dir, save_to, processes)
+    def download(self):
+        r"""Download the APIBench dataset."""
+        from huggingface_hub import snapshot_download
+        snapshot_download(
+            repo_id="gorilla-llm/APIBench",
+            repo_type="dataset",
+            local_dir=self.data_dir,
+            local_dir_use_symlinks=True,
+        )
+        repo = "ShishirPatil/gorilla"
+        subdir = "/gorilla/eval/eval-data/questions"
+        data_dir = self.data_dir
+        download_github_subdirectory(repo, subdir, data_dir)
+    def load(self, dataset_name: str, force_download: bool = False):  # type: ignore[override]
+        r"""Load the APIBench Benchmark dataset.
+        Args:
+            dataset_name (str): Name of the specific dataset to be loaded.
+            force_download (bool, optional): Whether to force
+                download the data. (default: :obj:`False`)
+        """
+        if force_download:
+            logger.info("Force downloading data.")
+            self.download()
+        def load_json_lines(file_path: Path):
+            r"""Helper function to load JSON lines from a file."""
+            try:
+                with open(file_path, "r") as f:
+                    return [json.loads(line) for line in f]
+            except FileNotFoundError:
+                raise FileNotFoundError(f"File not found: {file_path}")
+            except json.JSONDecodeError as e:
+                raise ValueError(
+                    f"Error decoding JSON in file {file_path}: {e}"
+                )
+        dataset_path = self.data_dir / dataset_name
+        if not dataset_path.exists():
+            raise FileNotFoundError(
+                f"Dataset directory does not exist: {dataset_path}"
+            )
+        for label in ['api', 'eval', 'questions']:
+            file_name = dataset_mapping[dataset_name][label]
+            file_path = (
+                dataset_path / file_name
+                if label == 'questions'
+                else self.data_dir / file_name
+            )
+            # Load data based on label type
+            if label in ['api', 'questions', 'eval']:
+                data = load_json_lines(file_path)
+                if label == 'eval':
+                    # Extract 'api_data' specifically for eval label
+                    data = [item['api_data'] for item in data]
+                self._data[label] = data
+            else:
+                raise ValueError(f"Unknown label: {label}")
+        ast_database = []
+        for data in self._data['api']:
+            ast_tree = ast_parse(data['api_call'])
+            ast_database.append(ast_tree)
+        self._data['ast'] = ast_database
+    def run(  # type: ignore[override]
+        self,
+        agent: ChatAgent,
+        dataset_name: Literal["huggingface", "tensorflowhub", "torchhub"],
+        randomize: bool = False,
+        subset: Optional[int] = None,
+    ) -> Dict[str, Any]:
+        r"""Run the benchmark.
+        Args:
+            agent (ChatAgent): The agent to run the
+                benchmark.
+            dataset_name (Literal["huggingface",
+                "tensorflowhub", "torchhub"]):
+                The dataset to run the benchmark.
+            randomize (bool, optional): Whether to randomize the data.
+                (default: :obj:`False`)
+            subset (Optional[int], optional): The subset of data to run.
+                (default: :obj:`None`)
+        """
+        if dataset_name not in dataset_mapping:
+            raise ValueError(f"Invalid value for dataset: {dataset_name}.")
+        logger.info(f"Running APIBench benchmark on {dataset_name}.")
+        self.load(dataset_name)
+        datas = self._data['questions']
+        # Shuffle and subset data if necessary
+        if randomize:
+            random.shuffle(datas)
+        if subset:
+            datas = datas[:subset]
+        logger.info(f"Number of tasks: {len(datas)}")
+        # Initialize results storage
+        self._results = []
+        with open(self.save_to, "w") as f:
+            for question in tqdm(datas, desc="Running"):
+                prompt = encode_question(question["text"], dataset_name)
+                msg = BaseMessage.make_user_message(
+                    role_name="User", content=prompt
+                )
+                try:
+                    # Generate response
+                    responses = agent.step(msg)
+                    response = responses.msgs[0].content
+                    api_database = self._data['api']
+                    qa_pairs = self._data['eval']
+                    ast_database = self._data['ast']
+                    question_id = question['question_id']
+                    # Evaluate response
+                    error, correct, hallucination = evaluate_response(
+                        response,
+                        question_id,
+                        dataset_name,
+                        api_database,
+                        qa_pairs,
+                        ast_database,
+                    )
+                    self._results.append(
+                        {
+                            "question": question,
+                            "agent_response": response,
+                            "correct": correct,
+                            "hallucination": hallucination,
+                            "error": str(error) if error else None,
+                        }
+                    )
+                except Exception as e:
+                    logger.warning(
+                        f"Error in processing task: {question}: {e}"
+                    )
+                    self._results.append(
+                        {
+                            "question": question,
+                            "agent_response": None,
+                            "correct": False,
+                            "hallucination": False,
+                            "error": str(e),
+                        }
+                    )
+                agent.reset()
+                f.write(json.dumps(self._results[-1], indent=2) + "\n")
+                f.flush()
+        total = len(self._results)
+        correct = sum(r["correct"] for r in self.results)
+        hallucination = sum(r["hallucination"] for r in self.results)
+        return {
+            "total": total,
+            "correct": correct,
+            "hallucination": hallucination,
+            "accuracy": correct / total if total else "N/A",
+            "hallucination rate": hallucination / total if total else "N/A",
+        }
+# This code is modified from the
+# evaluators in the original repo
+# https://github.com/ShishirPatil/gorilla
+# Get all the subtrees given a root_node
+def get_all_sub_trees(root_node):
+    node_stack = []
+    sub_tree_sexp_list = []
+    depth = 1
+    # text = root_node.text
+    node_stack.append([root_node, depth])
+    while len(node_stack) != 0:
+        cur_node, cur_depth = node_stack.pop()
+        if cur_node.child_count > 0:
+            sub_tree_sexp_list.append(
+                [
+                    str(cur_node),
+                    cur_depth,
+                    cur_node,
+                    cur_node.children[0].text,
+                ]
+            )
+        else:
+            sub_tree_sexp_list.append(
+                [str(cur_node), cur_depth, cur_node, None]
+            )
+        for child_node in cur_node.children:
+            if len(child_node.children) != 0:
+                depth = cur_depth + 1
+                node_stack.append([child_node, depth])
+    return sub_tree_sexp_list
+# Parse the program into AST trees
+def ast_parse(candidate):
+    PY_LANGUAGE = Language(tspython.language())
+    parser = Parser(PY_LANGUAGE)
+    candidate_tree = parser.parse(bytes(candidate, "utf8")).root_node
+    return candidate_tree
+# Get all the arguments in the ast tree
+def get_args(node, dataset_name):
+    if node.child_count == 0:
+        return []
+    args_list = []
+    if dataset_name == "huggingface":
+        for child in node.children[0].children[0].children[1].children:
+            if "=" in child.text.decode():
+                args_list.append(child.children[2].text)
+            elif (
+                child.text.decode() != "("
+                and child.text.decode() != ")"
+                and child.text.decode() != ","
+            ):
+                args_list.append(child.text)
+    elif dataset_name == "tensorflowhub":
+        for child in node.children[0].children[0].children[1].children:
+            if (
+                'model=' in child.text.decode()
+                or 'model =' in child.text.decode()
+            ):
+                args_list.append(child.children[2].text)
+            elif (
+                child.text.decode() != "("
+                and child.text.decode() != ")"
+                and child.text.decode() != ","
+            ):
+                args_list.append(child.text)
+    elif dataset_name == "torchhub":
+        for child in node.children[0].children[0].children[1].children:
+            if (
+                "repo_or_dir" in child.text.decode()
+                or "model" in child.text.decode()
+            ):
+                args_list.append(child.children[2].text)
+    return args_list
+# Check if there is an api match
+def ast_check(candidate_subtree_list, base_tree_list, dataset_name):
+    for idx, base_tree in enumerate(base_tree_list):
+        if base_tree.children[0].children[0].child_count == 0:
+            continue
+        api_name = base_tree.children[0].children[0].children[0].text
+        for candidate_tree in candidate_subtree_list:
+            if candidate_tree[3] == api_name:
+                break
+        # Now we have a sub-tree
+        candidate_tree = candidate_tree[2]
+        args_list = get_args(base_tree, dataset_name)
+        if len(args_list) == 0:
+            continue
+        ast_match = True
+        for arg in args_list:
+            if (
+                arg.decode().lstrip("'").rstrip("'")
+                not in candidate_tree.text.decode()
+            ):
+                ast_match = False
+                break
+        if ast_match:
+            return idx
+    return -1
+def evaluate_response(
+    response, question_id, dataset_name, api_database, qa_pairs, ast_database
+):
+    try:
+        # Index the "api_call" domain
+        output = response.split("api_call")
+        if len(output) == 1:
+            api_call = output[0]
+        else:
+            # Parse the output
+            output = output[1].split("api_provider")[0]
+            if ":" not in output:
+                start = 0
+            else:
+                start = output.index(":")
+            if ")" not in output:
+                end = -2
+            else:
+                end = output.rindex(")")
+            api_call = output[start + 2 : end + 1]
+        try:
+            ast_tree = ast_parse(api_call)
+        except Exception as parse_error:
+            print(f"Error parsing api_call: {api_call}, error: {parse_error}")
+            return parse_error, False, False
+        # Search for a subtree
+        ast_subtree_list = get_all_sub_trees(ast_tree)
+        # Check which ast tree is matching
+        database_index = ast_check(
+            ast_subtree_list, ast_database, dataset_name
+        )
+        # We cannot index this ast in our database
+        if database_index == -1:
+            halluncination = True
+            correct = False
+        # We index our reference api_call
+        ref_api_call = api_database[database_index]
+        # Check for functionality
+        if ref_api_call['domain'] == qa_pairs[question_id - 1]['domain']:
+            correct = True
+            halluncination = False
+        else:
+            return None, False, False
+    except Exception as e:
+        print(f'Error parsing response: {response}, error: {e}')
+        return e, False, False
+    return None, correct, halluncination

Paper2Poster/camel/benchmarks/base.py ADDED Viewed

	@@ -0,0 +1,152 @@

+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+import logging
+from abc import ABC, abstractmethod
+from pathlib import Path
+from typing import Any, Dict, List, Literal, Optional
+from camel.agents import ChatAgent
+logger = logging.getLogger(__name__)
+class BaseBenchmark(ABC):
+    r"""Base class for benchmarks.
+    Attributes:
+        name (str): Name of the benchmark.
+        data_dir (str): Path to the data directory.
+        save_to (str): Path to save the results.
+        processes (int): Number of processes to use for parallel
+            processing. :(default: :obj:`1`)
+    """
+    def __init__(
+        self, name: str, data_dir: str, save_to: str, processes: int = 1
+    ):
+        r"""Initialize the benchmark.
+        Args:
+            name (str): Name of the benchmark.
+            data_dir (str): Path to the data directory.
+            save_to (str): Path to save the results.
+            processes (int): Number of processes to use for parallel
+                processing. :(default: :obj:`1`)
+        """
+        self.name = name
+        self.data_dir = Path(data_dir)
+        self.processes = processes
+        self.save_to = save_to
+        if not self.data_dir.exists():
+            logger.info(
+                f"Data directory {data_dir} does not exist. Creating it."
+            )
+            self.data_dir.mkdir(parents=True, exist_ok=True)
+        if not self.data_dir.is_dir():
+            raise NotADirectoryError(
+                f"Data directory {data_dir} is not a directory"
+            )
+        self._data: Dict[str, List[Dict[str, Any]]] = dict()
+        self._results: List[Dict[str, Any]] = []
+    @abstractmethod
+    def download(self) -> "BaseBenchmark":
+        r"""Download the benchmark data.
+        Returns:
+            BaseBenchmark: The benchmark instance.
+        """
+        pass
+    @abstractmethod
+    def load(self, force_download: bool = False) -> "BaseBenchmark":
+        r"""Load the benchmark data.
+        Args:
+            force_download (bool): Whether to force download the data.
+        Returns:
+            BaseBenchmark: The benchmark instance.
+        """
+        pass
+    @property
+    def train(self) -> List[Dict[str, Any]]:
+        r"""Get the training data.
+        Returns:
+            List[Dict[str, Any]]: The training data.
+        """
+        if not self._data:
+            logger.info("Data not loaded. Loading data.")
+            self.load()
+        return self._data["train"]
+    @property
+    def valid(self) -> List[Dict[str, Any]]:
+        r"""Get the validation data.
+        Returns:
+            List[Dict[str, Any]]: The validation data.
+        """
+        if not self._data:
+            logger.info("Data not loaded. Loading data.")
+            self.load()
+        return self._data["valid"]
+    @property
+    def test(self) -> List[Dict[str, Any]]:
+        r"""Get the test data.
+        Returns:
+            List[Dict[str, Any]]: The test data.
+        """
+        if not self._data:
+            logger.info("Data not loaded. Loading data.")
+            self.load()
+        return self._data["test"]
+    @abstractmethod
+    def run(
+        self,
+        agent: ChatAgent,
+        on: Literal["train", "valid", "test"],
+        randomize: bool = False,
+        subset: Optional[int] = None,
+        *args,
+        **kwargs,
+    ) -> "BaseBenchmark":
+        r"""Run the benchmark.
+        Args:
+            agent (ChatAgent): The chat agent.
+            on (str): The data split to run the benchmark on.
+            randomize (bool): Whether to randomize the data.
+            subset (int): The subset of the data to run the benchmark on.
+        Returns:
+            BaseBenchmark: The benchmark instance.
+        """
+        pass
+    @property
+    def results(self) -> List[Dict[str, Any]]:
+        r"""Get the results.
+        Returns:
+            List[Dict[str, Any]]: The results.
+        """
+        return self._results

Paper2Poster/camel/benchmarks/gaia.py ADDED Viewed

	@@ -0,0 +1,478 @@

+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+import json
+import logging
+import os
+import random
+import re
+import string
+import uuid
+from pathlib import Path
+from typing import Any, Dict, List, Literal, Optional, Protocol, Union
+from tqdm import tqdm
+from camel.agents import ChatAgent
+from camel.benchmarks.base import BaseBenchmark
+from camel.messages import BaseMessage
+from camel.retrievers.auto_retriever import AutoRetriever
+logger = logging.getLogger(__name__)
+class RetrieverProtocol(Protocol):
+    r"""Protocol for the retriever class. Any retriever class implementing
+    this protocol can be used in the benchmark class.
+    """
+    def retrieve(
+        self, query: str, contents: List[str], **kwargs: Dict[str, Any]
+    ) -> Dict[str, Any]:
+        r"""Retrieve the relevant content for the query.
+        Args:
+            query (str): The query to retrieve the content for.
+            contents (List[str]): The list of contents to search in.
+            **kwargs (Dict[str, Any]): Additional keyword arguments.
+        Returns:
+            Dict[str, Any]: The relevant content for the query.
+        """
+        ...
+    def reset(self, **kwargs) -> bool:
+        r"""Reset the retriever.
+        Some benchmarks may require resetting the retriever
+        after each query.
+        Args:
+            **kwargs: Additional keyword arguments.
+        Returns:
+            bool: True if the reset was successful, False otherwise.
+        """
+        ...
+class DefaultGAIARetriever(AutoRetriever):
+    r"""Default retriever for the GAIA benchmark.
+    This retriever uses AutoRetriever in camel to retrieve the content based on
+    the query.
+    """
+    def retrieve(
+        self, query: str, contents: List[str], **kwargs: Any
+    ) -> Dict[str, Any]:
+        r"""Retrieve the content based on the query.
+        Args:
+            query (str): The query to search for.
+            contents (List[str]): The list of contents to search from.
+            **kwargs (Any): The keyword arguments to pass to the
+                retriever.
+        Returns:
+            Dict[str, Any]: The retrieved content.
+        """
+        return self.run_vector_retriever(query, contents, **kwargs)  # type: ignore[arg-type]
+    def reset(self, **kwargs: Any) -> bool:
+        r"""Reset the retriever.
+        Args:
+            **kwargs (Any): The keyword arguments to pass to the
+                retriever.
+        Returns:
+            bool: Whether the reset was successful.
+        """
+        path = Path(self.vector_storage_local_path or os.getcwd())
+        task_id = str(kwargs.get("task_id", uuid.uuid4()))
+        retriever_dir = path / task_id
+        if not retriever_dir.exists():
+            try:
+                retriever_dir.mkdir(parents=True)
+            except Exception as e:
+                logger.error(
+                    "Error in creating directory: " + f"{retriever_dir}: {e!s}"
+                )
+                return False
+        self.vector_storage_local_path = str(retriever_dir)
+        return True
+class GAIABenchmark(BaseBenchmark):
+    r"""GAIA Benchmark adapted from `"GAIA: a benchmark for General AI
+    Assistants"
+    <https://huggingface.co/datasets/gaia-benchmark/GAIA>`_.
+    Args:
+        data_dir (str): The directory to save the data.
+        save_to (str): The file to save the results.
+        retriever (Optional[RetrieverProtocol]): The retriever to use.
+            (default: :obj:`None`)
+        processes (int, optional): The number of processes to use.
+            (default: :obj:`1`)
+    """
+    def __init__(
+        self,
+        data_dir: str,
+        save_to: str,
+        retriever: Optional[RetrieverProtocol] = None,
+        processes: int = 1,
+    ):
+        r"""Initialize the GAIA benchmark.
+        Args:
+            data_dir (str): The directory to save the data.
+            save_to (str): The file to save the results.
+            retriever (Optional[RetrieverProtocol], optional): The retriever to
+                use. (default: :obj:`None`)
+            processes (int, optional): The number of processes to use for
+                parallel processing. (default: :obj:`1`)
+        """
+        super().__init__("gaia", data_dir, save_to, processes)
+        self.retriever = retriever or DefaultGAIARetriever()
+    def download(self):
+        r"""Download the GAIA dataset."""
+        from huggingface_hub import snapshot_download
+        snapshot_download(
+            repo_id="gaia-benchmark/GAIA",
+            repo_type="dataset",
+            local_dir=self.data_dir,
+            local_dir_use_symlinks=True,
+        )
+    def load(self, force_download=False):
+        r"""Load the GAIA dataset.
+        Args:
+            force_download (bool, optional): Whether to
+                force download the data.
+        """
+        if force_download:
+            logger.info("Force downloading data.")
+            self.download()
+        # Define validation and test directories
+        valid_dir = self.data_dir / "2023/validation"
+        test_dir = self.data_dir / "2023/test"
+        # Check if directories exist; if not, download the data
+        if not valid_dir.is_dir() or not test_dir.is_dir():
+            logger.info("Data not found. Downloading data.")
+            self.download()
+        # Load metadata for both validation and test datasets
+        for path, label in zip([valid_dir, test_dir], ["valid", "test"]):
+            self._data[label] = []
+            with open(path / "metadata.jsonl", "r") as f:
+                lines = f.readlines()
+                for line in lines:
+                    data = json.loads(line)
+                    if data["task_id"] == "0-0-0-0-0":
+                        continue
+                    if data["file_name"]:
+                        data["file_name"] = path / data["file_name"]
+                    self._data[label].append(data)
+        return self
+    @property
+    def train(self):
+        r"""Get the training set."""
+        raise NotImplementedError("GAIA does not have a training set.")
+    def run(  # type: ignore[override]
+        self,
+        agent: ChatAgent,
+        on: Literal["train", "valid", "test"],
+        level: Union[int, List[int], Literal["all"]],
+        randomize: bool = False,
+        subset: Optional[int] = None,
+    ) -> Dict[str, Any]:
+        r"""Run the benchmark.
+        Args:
+            agent (ChatAgent): The agent to run the benchmark.
+            on (Literal["valid", "test"]): The set to run the benchmark.
+            level (Union[int, List[int], Literal["all"]]): The level to run
+                the benchmark.
+            randomize (bool, optional): Whether to randomize the data.
+                (default: :obj:`False`)
+            subset (Optional[int], optional): The subset of data to run.
+                (default: :obj:`None`)
+        Returns:
+            Dict[str, Any]: The results of the benchmark.
+        """
+        # Validate inputs
+        if on not in ["valid", "test"]:
+            raise ValueError(
+                f"Invalid value for `on`: {on}, expected 'valid' or 'test'."
+            )
+        levels = (
+            [1, 2, 3]
+            if level == "all"
+            else [level]
+            if isinstance(level, int)
+            else level
+        )
+        if not all(
+            isinstance(level, int) and level in [1, 2, 3] for level in levels
+        ):
+            raise ValueError(
+                f"Invalid value for `level`: {level}, expected 1, 2, 3 "
+                "or 'all'."
+            )
+        logger.info(f"Running benchmark on {on} set at levels {levels}.")
+        datas = [data for data in self._data[on] if data["Level"] in levels]
+        # Shuffle and subset data if necessary
+        if randomize:
+            random.shuffle(datas)
+        if subset:
+            datas = datas[:subset]
+        logger.info(f"Number of tasks: {len(datas)}")
+        # Initialize results storage
+        self._results = []
+        # Process tasks
+        with open(self.save_to, "w") as f:
+            for task in tqdm(datas, desc="Running"):
+                if not self._prepare_task(task):
+                    continue
+                try:
+                    result = agent.step(self._create_user_message(task))
+                    self._process_result(agent, task, result, f)
+                except Exception as e:
+                    self._handle_error(task, e, f)
+                finally:
+                    agent.reset()
+        return self._generate_summary()
+    def _prepare_task(self, task: Dict[str, Any]) -> bool:
+        r"""Prepare the task by validating and enriching its data."""
+        if task["file_name"]:
+            file_path = Path(task["file_name"])
+            if not file_path.exists():
+                logger.info(
+                    f"Skipping task because file not found: {file_path}"
+                )
+                return False
+            if file_path.suffix in [".pdf", ".docx", ".doc", ".txt"]:
+                if not self.retriever.reset(task_id=task["task_id"]):
+                    return False
+                retrieved_info = self.retriever.retrieve(
+                    query=task["Question"], contents=[task["file_name"]]
+                )
+                retrieved_content = [
+                    item["text"]
+                    for item in retrieved_info.get("Retrieved Context", [])
+                ]
+                if retrieved_content:
+                    task["Question"] += "\n" + "\n".join(retrieved_content)
+            else:
+                logger.info(
+                    f"Skipping task due to unsupported file "
+                    f"format: {file_path.suffix}"
+                )
+                return False
+        return True
+    def _create_user_message(self, task: Dict[str, Any]) -> BaseMessage:
+        r"""Create a user message from a task."""
+        return BaseMessage.make_user_message(
+            role_name="User",
+            content=task["Question"],
+        )
+    def _process_result(
+        self,
+        agent: ChatAgent,
+        task: Dict[str, Any],
+        result: Any,
+        file_obj: Any,
+    ) -> None:
+        r"""Process and store the result of a task."""
+        model_answer = self.get_final_answer(result.msgs[0].content)
+        final_answer = task["Final answer"]
+        score = self.question_scorer(model_answer, final_answer)
+        tool_calls = result.info.get("tool_calls", [])
+        result_data = {
+            "task_id": task["task_id"],
+            "question": task["Question"],
+            "level": task["Level"],
+            "model_answer": model_answer,
+            "ground_truth": final_answer,
+            "tool_calls": [tool.model_dump() for tool in tool_calls],
+            "error": None,
+            "score": int(score),
+            "history": agent.memory.get_context(),
+        }
+        self._results.append(result_data)
+        file_obj.write(json.dumps(result_data, indent=2) + "\n")
+        file_obj.flush()
+    def _handle_error(
+        self, task: Dict[str, Any], error: Exception, file_obj: Any
+    ) -> None:
+        r"""Handle errors encountered during task processing."""
+        logger.warning(f"Error processing task {task['task_id']}: {error}")
+        error_data = {
+            "task_id": task["task_id"],
+            "question": task["Question"],
+            "level": task["Level"],
+            "model_answer": "ERROR",
+            "ground_truth": task["Final answer"],
+            "tool_calls": [],
+            "error": str(error),
+            "score": 0,
+        }
+        self._results.append(error_data)
+        file_obj.write(json.dumps(error_data, indent=2) + "\n")
+        file_obj.flush()
+    def _generate_summary(self) -> Dict[str, Any]:
+        r"""Generate and return a summary of the benchmark results."""
+        return {
+            "total": len(self._results),
+            "correct": sum(result["score"] for result in self._results),
+            "results": self._results,
+        }
+    def question_scorer(self, model_answer: str, ground_truth: str) -> bool:
+        r"""Scorer for the GAIA benchmark.
+        https://huggingface.co/spaces/gaia-benchmark/leaderboard/blob/main/
+        scorer.py
+        Args:
+            model_answer (str): The model answer.
+            ground_truth (str): The ground truth answer.
+        Returns:
+            bool: The score of the model
+        """
+        def is_float(element: Any) -> bool:
+            try:
+                float(element)
+                return True
+            except ValueError:
+                return False
+        if is_float(ground_truth):
+            logger.info(f"Evaluating {model_answer} as a number.")
+            normalized_answer = self.normalize_number_str(model_answer)
+            return normalized_answer == float(ground_truth)
+        elif any(char in ground_truth for char in [",", ";"]):
+            logger.info(
+                f"Evaluating {model_answer} as a comma separated list."
+            )
+            gt_elems = self.split_string(ground_truth)
+            ma_elems = self.split_string(model_answer)
+            if len(gt_elems) != len(ma_elems):
+                logger.warning(
+                    "Answer lists have different lengths, returning False.",
+                    UserWarning,
+                )
+                return False
+            comparisons = []
+            for ma_elem, gt_elem in zip(ma_elems, gt_elems):
+                if is_float(gt_elem):
+                    normalized_ma_elem = self.normalize_number_str(ma_elem)
+                    comparisons.append(normalized_ma_elem == float(gt_elem))
+                else:
+                    ma_elem = self.normalize_str(ma_elem, remove_punct=False)
+                    gt_elem = self.normalize_str(gt_elem, remove_punct=False)
+                    comparisons.append(ma_elem == gt_elem)
+            return all(comparisons)
+        else:
+            logger.info(f"Evaluating {model_answer} as a string.")
+            ma_elem = self.normalize_str(model_answer)
+            gt_elem = self.normalize_str(ground_truth)
+            return ma_elem == gt_elem
+    def normalize_number_str(self, number_str: str) -> float:
+        for char in ["$", "%", ","]:
+            number_str = number_str.replace(char, "")
+        try:
+            return float(number_str)
+        except ValueError:
+            logger.error(
+                f"String {number_str} cannot be normalized to number str."
+            )
+            return float("inf")
+    def split_string(
+        self, s: str, char_list: Optional[List[str]] = None
+    ) -> list[str]:
+        r"""Split a string based on a list of characters.
+        Args:
+            s (str): The string to split.
+            char_list (Optional[List[str]], optional): T
+                he list of characters to split on.
+                (default: :obj:`None`)
+        """
+        if char_list is None:
+            char_list = [",", ";"]
+        pattern = f"[{''.join(char_list)}]"
+        return re.split(pattern, s)
+    def normalize_str(self, input_str, remove_punct=True) -> str:
+        r"""Normalize a string.
+        Args:
+            input_str: The input string to normalize.
+            remove_punct: Whether to remove punctuation.
+        Returns:
+            str: The normalized string.
+        """
+        no_spaces = re.sub(r"\s", "", input_str)
+        if remove_punct:
+            translator = str.maketrans("", "", string.punctuation)
+            return no_spaces.lower().translate(translator)
+        else:
+            return no_spaces.lower()
+    def get_final_answer(self, content: str) -> str:
+        r"""Get the final answer from the content.
+        Args:
+            content (str): The content to extract the final answer from.
+        Returns:
+            str: The final answer.
+        """
+        final_answer_index = content.find("FINAL ANSWER")
+        if final_answer_index == -1:
+            return "FINAL ANSWER not found"
+        start_index = final_answer_index + len("FINAL ANSWER: ")
+        final_answer_content = content[start_index:].strip()
+        return final_answer_content

Paper2Poster/camel/benchmarks/nexus.py ADDED Viewed

	@@ -0,0 +1,518 @@

+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+import ast
+import json
+import logging
+import os
+import random
+import textwrap
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any, Dict, List, Literal, Optional, Tuple, Union
+import pandas as pd
+from datasets import load_dataset
+from tqdm import tqdm
+from camel.agents import ChatAgent
+from camel.benchmarks.base import BaseBenchmark
+from camel.messages import BaseMessage
+logger = logging.getLogger(__name__)
+# Define the data class
+@dataclass
+class NexusSample:
+    r"""Nexus benchmark dataset sample."""
+    input: str
+    output: str
+@dataclass
+class NexusTool:
+    r"""Nexus benchmark tool"""
+    function_calls: str
+    descriptions: str
+dataset_mapping = {
+    "NVDLibrary": "Nexusflow/NVDLibraryBenchmark",
+    "VirusTotal": "Nexusflow/VirusTotalBenchmark",
+    "PlacesAPI": "Nexusflow/PlacesAPIBenchmark",
+    "ClimateAPI": "Nexusflow/ClimateAPIBenchmark",
+    "OTX": "Nexusflow/OTXAPIBenchmark",
+    "VirusTotal-NestedCalls": "Nexusflow/vt_multiapi",
+    "VirusTotal-ParallelCalls": "Nexusflow/vt_multiapi",
+    "NVDLibrary-NestedCalls": "Nexusflow/CVECPEAPIBenchmark",
+}
+TOOL_CALLING_PROMPT = """
+You are given multiple functions and a user query.
+Please proceed with generating a function call for the function \
+with the proper arguments that best answers the given prompt.
+Respond with nothing but the function call ONLY, such that I can \
+directly execute your function call without any post processing \
+necessary from my end. Do not use variables.
+If there are more than two function calls, separate them with a semicolon (;).
+{tools}
+Question: {input}
+"""
+class NexusBenchmark(BaseBenchmark):
+    r"""Nexus Function Calling Benchmark adapted from `NexusRaven V2
+    Function Calling Benchmark`
+    <https://huggingface.co/collections/Nexusflow/nexusraven-v2-function-calling-benchmark-657a597fb84dbe7a09ebfc3e>.
+    Args:
+        data_dir (str): The directory to save the data.
+        save_to (str): The file to save the results.
+        processes (int, optional): The number of processes to use.
+            (default: :obj:`1`)
+    """
+    def __init__(
+        self,
+        data_dir: str,
+        save_to: str,
+        processes: int = 1,
+    ):
+        r"""Initialize the Nexus Function Calling benchmark.
+        Args:
+            data_dir (str): The directory to save the data.
+            save_to (str): The file to save the results.
+            processes (int, optional): The number of processes to use for
+                parallel processing. (default: :obj:`1`)
+        """
+        super().__init__("nexus", data_dir, save_to, processes)
+        self._data: List[NexusSample] = []  # type: ignore[assignment]
+    def download(self):
+        r"""Download the Nexus Functional Calling Benchmark dataset."""
+        from huggingface_hub import snapshot_download
+        for dataset_name, repo_id in dataset_mapping.items():
+            local_dir = self.data_dir / dataset_name
+            snapshot_download(
+                repo_id=repo_id,
+                repo_type="dataset",
+                local_dir=local_dir,
+                local_dir_use_symlinks=True,
+            )
+    def load(self, dataset_name: str, force_download: bool = False):  # type: ignore[override]
+        r"""Load the Nexus Benchmark dataset.
+        Args:
+            dataset_name (str): Name of the specific dataset to be loaded.
+            force_download (bool): Whether to force download the data.
+        """
+        def _load_csv_data(dataset_dir: Path) -> List:
+            r"""Load datasets from CSV files."""
+            dataset = []
+            for file_name in os.listdir(dataset_dir):
+                file_path = dataset_dir / file_name
+                if file_name.endswith(".csv"):
+                    data = pd.read_csv(file_path)
+                    for _, sample in data.iterrows():
+                        dataset.append(
+                            NexusSample(
+                                sample["Input"], "".join(sample["Output"])
+                            )
+                        )
+                    continue
+                logger.warning(f"Skipping unsupported file: {file_name}")
+            return dataset
+        def _load_parquet_data(data_dir: Path, dataset_name: str) -> List:
+            r"""Load datasets from Parquet files."""
+            dataset = []
+            if not data_dir.exists():
+                raise FileNotFoundError(
+                    f"Data directory '{data_dir}' does not exist."
+                )
+            for file_name in os.listdir(data_dir):
+                file_path = data_dir / file_name
+                if file_name.endswith(".parquet"):
+                    data = pd.read_parquet(file_path)
+                    dataset.extend(_process_parquet_data(data, dataset_name))
+                    continue
+                logger.warning(f"Skipping unsupported file: {file_name}")
+            return dataset
+        def _process_parquet_data(
+            data: pd.DataFrame, dataset_name: str
+        ) -> List:
+            r"""Process data from Parquet files based on dataset name."""
+            dataset: List = []
+            dataset_handlers = {
+                "NVDLibrary": _process_nvdlibrary,
+                "VirusTotal": _process_simple,
+                "PlacesAPI": _process_simple,
+                "ClimateAPI": _process_simple,
+                "OTX": _process_simple,
+                "VirusTotal-NestedCalls": _process_nested_calls,
+                "VirusTotal-ParallelCalls": _process_parallel_calls,
+            }
+            if dataset_name not in dataset_handlers:
+                logger.warning(
+                    f"No specific handler for dataset: {dataset_name}"
+                )
+                return dataset
+            handler = dataset_handlers[dataset_name]
+            for _, sample in data.iterrows():
+                processed_sample = handler(sample)
+                if processed_sample:
+                    dataset.append(processed_sample)
+            return dataset
+        def _process_nvdlibrary(sample) -> NexusSample:
+            r"""Process samples for the NVDLibrary dataset."""
+            return NexusSample(
+                sample["Input"], sample["Output"].replace("r = nvdlib.", "")
+            )
+        def _process_simple(sample) -> NexusSample:
+            r"""Process samples for simple datasets (e.g., VirusTotal)."""
+            return NexusSample(sample["Input"], sample["Output"])
+        def _process_nested_calls(sample) -> Union[NexusSample, None]:
+            r"""Process samples for VirusTotal-NestedCalls dataset."""
+            if len(sample["fncall"]) == 1:
+                return NexusSample(
+                    sample["generated_question"], "".join(sample["fncall"])
+                )
+            return None
+        def _process_parallel_calls(sample) -> Union[NexusSample, None]:
+            r"""Process samples for VirusTotal-ParallelCalls dataset."""
+            if len(sample["fncall"]) > 1:
+                return NexusSample(
+                    sample["generated_question"], "; ".join(sample["fncall"])
+                )
+            return None
+        if force_download:
+            logger.info("Force downloading data.")
+            self.download()
+        # Validate dataset name
+        if dataset_name not in dataset_mapping:
+            available_datasets = list(dataset_mapping.keys())
+            raise ValueError(
+                f"Dataset '{dataset_name}' is not recognized. "
+                f"Available datasets: {available_datasets}"
+            )
+        # Get the dataset directory
+        dataset_dir = self.data_dir / dataset_name
+        if not dataset_dir.exists():
+            raise FileNotFoundError(
+                f"The dataset directory for '{dataset_name}' \
+                does not exist at {dataset_dir}. "
+                "Please download it first."
+            )
+        # Load the dataset
+        if dataset_name == "NVDLibrary-NestedCalls":
+            self._data = _load_csv_data(dataset_dir)
+        else:
+            self._data = _load_parquet_data(dataset_dir / "data", dataset_name)
+    @property
+    def train(self):
+        r"""Get the training set."""
+        raise NotImplementedError(
+            "Nexus Functional Calling has only a single 'train' set."
+        )
+    def run(  # type: ignore[override, return]
+        self,
+        agent: ChatAgent,
+        task: Literal[
+            "NVDLibrary",
+            "VirusTotal",
+            "OTX",
+            "PlacesAPI",
+            "ClimateAPI",
+            "VirusTotal-ParallelCalls",
+            "VirusTotal-NestedCalls",
+            "NVDLibrary-NestedCalls",
+        ],
+        randomize: bool = False,
+        subset: Optional[int] = None,
+    ) -> Dict[str, Any]:
+        r"""Run the benchmark.
+        Args:
+            agent (ChatAgent): The agent to run the benchmark.
+            task (Literal["NVDLibrary", "VirusTotal", "OTX",
+            "PlacesAPI", "ClimateAPI", "VirusTotal-ParallelCalls",
+            "VirusTotal-NestedCalls",
+            "NVDLibrary-NestedCalls"]): The task to run the benchmark.
+            randomize (bool, optional): Whether to randomize the data.
+                (default: :obj:`False`)
+            subset (Optional[int], optional): The subset of data to run.
+                (default: :obj:`None`)
+        Returns:
+            Dict[str, Any]: The results of the benchmark.
+        """
+        if task not in dataset_mapping:
+            raise ValueError(f"Invalid value for dataset: {task}.")
+        logger.info(f"Running Nexus Function Calling benchmark on {task}.")
+        self.load(task)
+        datas = self._data
+        # Shuffle and subset data if necessary
+        if randomize:
+            random.shuffle(datas)
+        if subset:
+            datas = datas[:subset]
+        logger.info(f"Number of tasks: {len(datas)}")
+        # Initialize results storage
+        self._results = []
+        # Process samples
+        tools = construct_tool_descriptions(task)
+        with open(self.save_to, "w") as f:
+            for sample in tqdm(datas, desc="Running"):
+                prompt = construct_prompt(input=sample.input, tools=tools)
+                msg = BaseMessage.make_user_message(
+                    role_name="User", content=prompt
+                )
+                ground_truth_call = sample.output
+                try:
+                    # Generate response
+                    response = agent.step(msg)
+                    agent_call = response.msgs[0].content
+                    # Evaluate response
+                    if agent_call:
+                        result = compare_function_calls(
+                            agent_call=agent_call,
+                            ground_truth_call=ground_truth_call,
+                        )
+                        self._results.append(
+                            {
+                                "input": sample.input,
+                                "agent_call": agent_call,
+                                "ground_truth_call": ground_truth_call,
+                                "result": result,
+                                "error": None,
+                            }
+                        )
+                except Exception as e:
+                    logger.warning(f"Error in processing task: {sample.input}")
+                    self._results.append(
+                        {
+                            "input": sample.input,
+                            "agent_call": None,
+                            "ground_truth_call": ground_truth_call,
+                            "result": 0,
+                            "error": str(e),
+                        }
+                    )
+                agent.reset()
+                f.write(json.dumps(self._results[-1], indent=2) + "\n")
+                f.flush()
+        total = len(self._results)
+        correct = sum(r["result"] for r in self._results)
+        return {
+            "total": total,
+            "correct": correct,
+            "accuracy": correct / total,
+        }
+# Utility functions
+def construct_tool_descriptions(dataset_name: str) -> str:
+    r"""Construct tool descriptions from function definitions and
+    descriptions."""
+    tool_dataset_mapping = {
+        "NVDLibrary": "CVECPE",
+        "VirusTotal": "VirusTotal",
+        "PlacesAPI": "Places",
+        "ClimateAPI": "Climate",
+        "OTX": "OTX",
+        "VirusTotal-NestedCalls": "VT_Multi (Nested)",
+        "VirusTotal-ParallelCalls": "VT_Multi (Parallel)",
+        "NVDLibrary-NestedCalls": "CVECPE_Multi (Nested)",
+    }
+    if dataset_name not in tool_dataset_mapping:
+        raise ValueError(
+            f"Dataset '{dataset_name}' is not recognized. "
+            f"Available datasets: {list(dataset_mapping.keys())}"
+        )
+    # Load the dataset based on the dataset name
+    dataset = load_dataset(
+        "Nexusflow/Function_Call_Definitions",
+        name=tool_dataset_mapping[dataset_name],
+    )["train"]
+    # Construct tool descriptions
+    tools = [
+        NexusTool(tool["function_calls"], tool["descriptions"])
+        for tool in dataset
+    ]
+    # Generate the tool prompt
+    tool_prompt = "".join(
+        f"Function:\ndef {tool.function_calls}:\n"
+        + "\"\"\"\n"
+        + f"{tool.descriptions}\n"
+        + "\"\"\"\n"
+        for tool in tools
+    )
+    return tool_prompt
+def construct_prompt(input: str, tools: str) -> str:
+    r"Construct prompt from tools and input."
+    return TOOL_CALLING_PROMPT.format(tools=tools, input=input)
+# Functions for function call evaluation
+def parse_function_call(
+    call: str,
+) -> Tuple[Optional[str], Optional[List[Any]], Optional[Dict[str, Any]]]:
+    r"""Parse a function call string to extract the function name,
+    positional arguments, and keyword arguments, including
+    nested function calls.
+    Args:
+        call (str): A string in the format `func(arg1, arg2, kwarg=value)`.
+    Returns:
+        tuple: (function_name (str), positional_args (list),
+        keyword_args (dict)) or (None, None, None).
+    """
+    def preprocess_input(call: str) -> str:
+        r"""Remove formatting like code blocks and whitespace."""
+        if call.strip().startswith("```python"):
+            call = call.strip().removeprefix("```python").removesuffix("```")
+        return textwrap.dedent(call).strip()
+    def evaluate_arg(arg):
+        r"""Recursively evaluate arguments, including nested calls."""
+        if isinstance(arg, ast.Call):
+            # Recursively parse nested calls
+            func_name, args, kwargs = parse_function_call(ast.unparse(arg))
+            return func_name, args, kwargs
+        elif isinstance(
+            arg, ast.Constant
+        ):  # Handle literals like numbers, strings, etc.
+            return arg.value
+        elif isinstance(arg, ast.List):  # Handle list literals
+            return [evaluate_arg(el) for el in arg.elts]
+        elif isinstance(arg, ast.Dict):  # Handle dictionary literals
+            return {
+                evaluate_arg(k): evaluate_arg(v)
+                for k, v in zip(arg.keys, arg.values)
+            }
+        elif isinstance(arg, ast.Tuple):  # Handle tuple literals
+            return tuple(evaluate_arg(el) for el in arg.elts)
+        else:
+            return ast.literal_eval(arg)  # Safely evaluate other types
+    call = preprocess_input(call)
+    parsed_calls = []
+    try:
+        # Parse the string into an AST
+        parsed_calls = call.split(";")
+        for single_call in parsed_calls:
+            tree = ast.parse(single_call, mode='eval')
+            # Ensure it's a function call
+            if isinstance(tree.body, ast.Call):
+                # Extract function name
+                if isinstance(
+                    tree.body.func, ast.Name
+                ):  # Simple function call
+                    func_name = tree.body.func.id
+                elif isinstance(
+                    tree.body.func, ast.Attribute
+                ):  # Attribute function call
+                    func_name = (
+                        f"{tree.body.func.value.id}.{tree.body.func.attr}"  # type: ignore[attr-defined]
+                    )
+                else:
+                    raise ValueError(f"Unsupported function call: {call}")
+                # Extract positional arguments
+                args = [evaluate_arg(arg) for arg in tree.body.args]
+                # Extract keyword arguments
+                kwargs: Dict[str, Any] = {
+                    kw.arg: evaluate_arg(kw.value)
+                    for kw in tree.body.keywords
+                    if kw.arg is not None
+                }
+                logger.info("Valid call.")
+                return func_name, args, kwargs
+        else:
+            raise ValueError(f"Not a valid function call: {call}")
+    except Exception as e:
+        logger.info(f"Error parsing call: {call}, {e}")
+        return None, None, None
+def compare_function_calls(agent_call: str, ground_truth_call: str) -> bool:
+    r"""Compare the function name and arguments of
+    agent_call and ground_truth_call.
+    Args:
+        agent_call (str): Function call by agent.
+        ground_truth_call (str): Ground truth function call.
+    Returns:
+        - `True` if the function names and arguments match.
+        - `False` otherwise.
+    """
+    # Parse both calls
+    agent_parsed = parse_function_call(agent_call)
+    gt_parsed = parse_function_call(ground_truth_call)
+    if agent_parsed and gt_parsed:
+        return agent_parsed == gt_parsed
+    else:
+        return False

Paper2Poster/camel/benchmarks/ragbench.py ADDED Viewed

	@@ -0,0 +1,333 @@

+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+from typing import Any, Callable, Dict, List, Literal, Optional, Sequence
+import numpy as np
+from datasets import Dataset, load_dataset
+from camel.agents import ChatAgent
+from camel.benchmarks import BaseBenchmark
+from camel.logger import get_logger
+from camel.retrievers import AutoRetriever
+logger = get_logger(__name__)
+class RagasFields:
+    r"""Constants for RAGAS evaluation field names."""
+    INPUT_CONTEXT = "contexts"
+    INPUT_QUESTION = "question"
+    INPUT_ANSWER = "answer"
+def annotate_dataset(
+    dataset: Dataset,
+    context_call: Optional[Callable[[Dict[str, Any]], List[str]]],
+    answer_call: Optional[Callable[[Dict[str, Any]], str]],
+) -> Dataset:
+    r"""Annotate the dataset by adding context and answers using the provided
+    functions.
+    Args:
+        dataset (Dataset): The input dataset to annotate.
+        context_call (Optional[Callable[[Dict[str, Any]], List[str]]]):
+            Function to generate context for each example.
+        answer_call (Optional[Callable[[Dict[str, Any]], str]]): Function to
+            generate answer for each example.
+    Returns:
+        Dataset: The annotated dataset with added contexts and/or answers.
+    """
+    def process_example(example: Dict[str, Any]) -> Dict[str, Any]:
+        if context_call:
+            example["contexts"] = context_call(example)
+        if answer_call:
+            example["answer"] = answer_call(example)
+        return example
+    return dataset.map(process_example)
+def rmse(
+    input_trues: Sequence[float],
+    input_preds: Sequence[float],
+) -> Optional[float]:
+    r"""Calculate Root Mean Squared Error (RMSE).
+    Args:
+        input_trues (Sequence[float]): Ground truth values.
+        input_preds (Sequence[float]): Predicted values.
+    Returns:
+        Optional[float]: RMSE value, or None if inputs have different lengths.
+    """
+    if len(input_trues) != len(input_preds):
+        logger.warning("Input lengths mismatch in RMSE calculation")
+        return None
+    trues = np.array(input_trues)
+    preds = np.array(input_preds, dtype=float)
+    # Ignore NaN values in predictions
+    eval_idx = ~np.isnan(preds)
+    if not np.any(eval_idx):
+        logger.warning("No valid predictions for RMSE calculation")
+        return None
+    trues = trues[eval_idx]
+    preds = preds[eval_idx]
+    return float(np.sqrt(np.mean((preds - trues) ** 2)))
+def auroc(trues: Sequence[bool], preds: Sequence[float]) -> float:
+    r"""Calculate Area Under Receiver Operating Characteristic Curve (AUROC).
+    Args:
+        trues (Sequence[bool]): Ground truth binary values.
+        preds (Sequence[float]): Predicted probability values.
+    Returns:
+        float: AUROC score.
+    """
+    from sklearn.metrics import roc_auc_score  # type: ignore[import-untyped]
+    eval_idx = ~np.isnan(preds)
+    if not np.any(eval_idx):
+        logger.warning("No valid predictions for AUROC calculation")
+        return 0.5  # Return random classifier score
+    return float(
+        roc_auc_score(np.array(trues)[eval_idx], np.array(preds)[eval_idx])
+    )
+def ragas_calculate_metrics(
+    dataset: Dataset,
+    pred_context_relevance_field: Optional[str],
+    pred_faithfulness_field: Optional[str],
+    metrics_to_evaluate: Optional[List[str]] = None,
+    ground_truth_context_relevance_field: str = "relevance_score",
+    ground_truth_faithfulness_field: str = "adherence_score",
+) -> Dict[str, Optional[float]]:
+    r"""Calculate RAGAS evaluation metrics.
+    Args:
+        dataset (Dataset): The dataset containing predictions and ground truth.
+        pred_context_relevance_field (Optional[str]): Field name for predicted
+            context relevance.
+        pred_faithfulness_field (Optional[str]): Field name for predicted
+            faithfulness.
+        metrics_to_evaluate (Optional[List[str]]): List of metrics to evaluate.
+        ground_truth_context_relevance_field (str): Field name for ground truth
+            relevance.
+        ground_truth_faithfulness_field (str): Field name for ground truth
+            adherence.
+    Returns:
+        Dict[str, Optional[float]]: Dictionary of calculated metrics.
+    """
+    metrics_to_evaluate = metrics_to_evaluate or [
+        "context_relevancy",
+        "faithfulness",
+    ]
+    calculated_metrics: Dict[str, Optional[float]] = {}
+    if (
+        "context_relevancy" in metrics_to_evaluate
+        and pred_context_relevance_field
+    ):
+        trues_relevance = dataset[ground_truth_context_relevance_field]
+        preds_relevance = dataset[pred_context_relevance_field]
+        calculated_metrics["relevance_rmse"] = rmse(
+            trues_relevance, preds_relevance
+        )
+    if "faithfulness" in metrics_to_evaluate and pred_faithfulness_field:
+        trues_hallucination = ~np.array(
+            dataset[ground_truth_faithfulness_field]
+        )
+        preds_hallucination = 1 - np.array(
+            dataset[pred_faithfulness_field], dtype=float
+        )
+        calculated_metrics["hallucination_auroc"] = auroc(
+            trues_hallucination.tolist(), preds_hallucination.tolist()
+        )
+    return calculated_metrics
+def ragas_evaluate_dataset(
+    dataset: Dataset,
+    contexts_field_name: Optional[str],
+    answer_field_name: Optional[str],
+    metrics_to_evaluate: Optional[List[str]] = None,
+) -> Dataset:
+    r"""Evaluate the dataset using RAGAS metrics.
+    Args:
+        dataset (Dataset): Input dataset to evaluate.
+        contexts_field_name (Optional[str]): Field name containing contexts.
+        answer_field_name (Optional[str]): Field name containing answers.
+        metrics_to_evaluate (Optional[List[str]]): List of metrics to evaluate.
+    Returns:
+        Dataset: Dataset with added evaluation metrics.
+    """
+    from ragas import evaluate
+    from ragas.metrics import (  # type: ignore[import-untyped]
+        context_relevancy,
+        faithfulness,
+    )
+    metrics_to_evaluate = metrics_to_evaluate or [
+        "context_relevancy",
+        "faithfulness",
+    ]
+    # Rename fields if necessary
+    if (
+        contexts_field_name
+        and contexts_field_name != RagasFields.INPUT_CONTEXT
+    ):
+        dataset = dataset.rename_column(
+            contexts_field_name, RagasFields.INPUT_CONTEXT
+        )
+    if answer_field_name and answer_field_name != RagasFields.INPUT_ANSWER:
+        dataset = dataset.rename_column(
+            answer_field_name, RagasFields.INPUT_ANSWER
+        )
+    metrics = []
+    if "context_relevancy" in metrics_to_evaluate:
+        metrics.append(context_relevancy)
+    if "faithfulness" in metrics_to_evaluate:
+        metrics.append(faithfulness)
+    ragas_result = evaluate(dataset, metrics=metrics)
+    return Dataset.from_pandas(ragas_result.to_pandas())
+class RAGBenchBenchmark(BaseBenchmark):
+    r"""RAGBench Benchmark for evaluating RAG performance.
+    This benchmark uses the rungalileo/ragbench dataset to evaluate
+    retrieval-augmented generation (RAG) systems. It measures context
+    relevancy and faithfulness metrics as described in
+    https://arxiv.org/abs/2407.11005.
+    Args:
+        processes (int, optional): Number of processes for parallel processing.
+        subset (str, optional): Dataset subset to use (e.g., "hotpotqa").
+        split (str, optional): Dataset split to use (e.g., "test").
+    """
+    def __init__(
+        self,
+        processes: int = 1,
+        subset: Literal[
+            "covidqa",
+            "cuad",
+            "delucionqa",
+            "emanual",
+            "expertqa",
+            "finqa",
+            "hagrid",
+            "hotpotqa",
+            "msmarco",
+            "pubmedqa",
+            "tatqa",
+            "techqa",
+        ] = "hotpotqa",
+        split: Literal["train", "test", "validation"] = "test",
+    ) -> None:
+        super().__init__("ragbench", "rag_bench", "", processes)
+        self.subset = subset
+        self.split = split
+        self.dataset: Optional[Dataset] = None
+    def download(self):
+        r"""Download the RAGBench dataset."""
+        try:
+            self.dataset = load_dataset(
+                "rungalileo/ragbench", self.subset, split=self.split
+            )
+        except Exception as e:
+            logger.error(f"Failed to download dataset: {e}")
+            raise
+    def load(self, force_download: bool = False):
+        r"""Load the RAGBench dataset.
+        Args:
+            force_download (bool, optional): Whether to force download the
+                data.
+        """
+        if force_download or self.dataset is None:
+            logger.info(
+                "%s dataset",
+                "Force downloading" if force_download else "Loading",
+            )
+            self.download()
+    def run(  # type: ignore[override, return]
+        self,
+        agent: ChatAgent,
+        auto_retriever: AutoRetriever,
+    ) -> Dict[str, Optional[float]]:
+        r"""Run the benchmark evaluation.
+        Args:
+            agent (ChatAgent): Chat agent for generating answers.
+            auto_retriever (AutoRetriever): Retriever for finding relevant
+                contexts.
+        Returns:
+            Dict[str, Optional[float]]: Dictionary of evaluation metrics.
+        """
+        def context_call(example):
+            retrieved_info = auto_retriever.run_vector_retriever(
+                query=example['question'],
+                contents=example['documents'],
+                top_k=1,
+                return_detailed_info=True,
+                similarity_threshold=0.5,
+            )
+            return [c['text'] for c in retrieved_info['Retrieved Context']]
+        def answer_call(example: Dict[str, Any]) -> str:
+            user_msg = str(example)
+            assistant_response = agent.step(user_msg)
+            return assistant_response.msg.content
+        # Annotate the dataset
+        annotated_ds = annotate_dataset(
+            self.dataset, context_call, answer_call
+        )
+        evaluated_ds = ragas_evaluate_dataset(
+            annotated_ds,
+            contexts_field_name="contexts",
+            answer_field_name="answer",
+            metrics_to_evaluate=["context_relevancy", "faithfulness"],
+        )
+        return ragas_calculate_metrics(
+            evaluated_ds,
+            pred_context_relevance_field="context_relevancy",
+            pred_faithfulness_field="faithfulness",
+        )