|
|
""" |
|
|
Content planner for project page generation. |
|
|
Plans the structure and content organization for the project page. |
|
|
""" |
|
|
|
|
|
import json |
|
|
import yaml |
|
|
import os |
|
|
from jinja2 import Environment, StrictUndefined |
|
|
from camel.models import ModelFactory |
|
|
from camel.agents import ChatAgent |
|
|
from utils.wei_utils import account_token |
|
|
from utils.src.utils import get_json_from_response |
|
|
from camel.messages import BaseMessage |
|
|
from rich import print |
|
|
from rich.pretty import Pretty |
|
|
import base64 |
|
|
from camel.messages import BaseMessage |
|
|
from camel.models import ModelFactory |
|
|
|
|
|
def filter_references(md_content: str) -> str: |
|
|
|
|
|
lines = md_content.splitlines() |
|
|
result_lines = [] |
|
|
for line in lines: |
|
|
if line.strip().lower().startswith("## references"): |
|
|
break |
|
|
result_lines.append(line) |
|
|
return "\n".join(result_lines) |
|
|
|
|
|
class ProjectPageContentPlanner: |
|
|
"""Plans the content structure and organization for project pages.""" |
|
|
|
|
|
def __init__(self, agent_config, args): |
|
|
self.agent_config = agent_config |
|
|
self.args = args |
|
|
self.planner_agent = self._create_planner_agent() |
|
|
self.reviewer_agent = self._create_reviewer_agent() |
|
|
os.makedirs('project_contents', exist_ok=True) |
|
|
|
|
|
def _create_planner_agent(self): |
|
|
"""Create the content planning (generation) agent.""" |
|
|
model_type = str(self.agent_config['model_type']) |
|
|
|
|
|
|
|
|
api_key = None |
|
|
if self.args.model_name_t in ['4o', '4o-mini', 'gpt-4.1', 'gpt-4.1-mini', 'o1', 'o3', 'o3-mini']: |
|
|
api_key = os.environ.get('OPENAI_API_KEY') |
|
|
elif self.args.model_name_t in ['gemini', 'gemini-2.5-pro', 'gemini-2.5-flash']: |
|
|
api_key = os.environ.get('GEMINI_API_KEY') |
|
|
elif self.args.model_name_t in ['qwen', 'qwen-plus', 'qwen-max', 'qwen-long']: |
|
|
api_key = os.environ.get('QWEN_API_KEY') |
|
|
elif self.args.model_name_t.startswith('openrouter_'): |
|
|
api_key = os.environ.get('OPENROUTER_API_KEY') |
|
|
elif self.args.model_name_t in ['zhipuai']: |
|
|
api_key = os.environ.get('ZHIPUAI_API_KEY') |
|
|
|
|
|
if model_type.startswith('vllm_qwen') or 'vllm' in model_type.lower(): |
|
|
model = ModelFactory.create( |
|
|
model_platform=self.agent_config['model_platform'], |
|
|
model_type=self.agent_config['model_type'], |
|
|
model_config_dict=self.agent_config['model_config'], |
|
|
url=self.agent_config.get('url', None), |
|
|
api_key=api_key, |
|
|
) |
|
|
else: |
|
|
model = ModelFactory.create( |
|
|
model_platform=self.agent_config['model_platform'], |
|
|
model_type=self.agent_config['model_type'], |
|
|
model_config_dict=self.agent_config['model_config'], |
|
|
api_key=api_key, |
|
|
) |
|
|
|
|
|
|
|
|
system_message = """You are a helpful academic expert and web developer, who is specialized in generating a paper project page, from given research paper's contents and figures.""" |
|
|
|
|
|
return ChatAgent( |
|
|
system_message=system_message, |
|
|
model=model, |
|
|
message_window_size=10, |
|
|
token_limit=self.agent_config.get('token_limit', None) |
|
|
) |
|
|
|
|
|
def _create_reviewer_agent(self): |
|
|
|
|
|
model_type = str(self.agent_config['model_type']) |
|
|
|
|
|
|
|
|
api_key = None |
|
|
if self.args.model_name_t in ['4o', '4o-mini', 'gpt-4.1', 'gpt-4.1-mini', 'o1', 'o3', 'o3-mini']: |
|
|
api_key = os.environ.get('OPENAI_API_KEY') |
|
|
elif self.args.model_name_t in ['gemini', 'gemini-2.5-pro', 'gemini-2.5-flash']: |
|
|
api_key = os.environ.get('GEMINI_API_KEY') |
|
|
elif self.args.model_name_t in ['qwen', 'qwen-plus', 'qwen-max', 'qwen-long']: |
|
|
api_key = os.environ.get('QWEN_API_KEY') |
|
|
elif self.args.model_name_t.startswith('openrouter_'): |
|
|
api_key = os.environ.get('OPENROUTER_API_KEY') |
|
|
elif self.args.model_name_t in ['zhipuai']: |
|
|
api_key = os.environ.get('ZHIPUAI_API_KEY') |
|
|
|
|
|
if model_type.startswith('vllm_qwen') or 'vllm' in model_type.lower(): |
|
|
model = ModelFactory.create( |
|
|
model_platform=self.agent_config['model_platform'], |
|
|
model_type=self.agent_config['model_type'], |
|
|
model_config_dict=self.agent_config['model_config'], |
|
|
url=self.agent_config.get('url', None), |
|
|
api_key=api_key, |
|
|
) |
|
|
else: |
|
|
model = ModelFactory.create( |
|
|
model_platform=self.agent_config['model_platform'], |
|
|
model_type=self.agent_config['model_type'], |
|
|
model_config_dict=self.agent_config['model_config'], |
|
|
api_key=api_key, |
|
|
) |
|
|
|
|
|
reviewer_system = ( |
|
|
"You are a precise, constructive reviewer of generated project pages. " |
|
|
) |
|
|
return ChatAgent( |
|
|
system_message=reviewer_system, |
|
|
model=model, |
|
|
message_window_size=10, |
|
|
token_limit=self.agent_config.get('token_limit', None) |
|
|
) |
|
|
|
|
|
def _render_generation_prompt(self, paper_content, figures, text_page_content, template_str): |
|
|
|
|
|
jinja_env = Environment(undefined=StrictUndefined) |
|
|
template = jinja_env.from_string(template_str) |
|
|
jinja_args = { |
|
|
'paper_content': paper_content, |
|
|
'figures': json.dumps(figures, indent=2), |
|
|
'project_page_content': json.dumps(text_page_content, indent=2), |
|
|
} |
|
|
return template.render(**jinja_args) |
|
|
|
|
|
def _build_reviewer_prompt(self, paper_content, figures, text_page_content, generated_json): |
|
|
|
|
|
with open('utils/prompt_templates/page_templates/full_content_review.yaml', 'r') as f: |
|
|
planner_config = yaml.safe_load(f) |
|
|
|
|
|
jinja_env = Environment(undefined=StrictUndefined) |
|
|
template = jinja_env.from_string(planner_config["template"]) |
|
|
|
|
|
jinja_args = { |
|
|
'paper_content': paper_content, |
|
|
'figures': json.dumps(figures['images'], indent=2), |
|
|
'tables': json.dumps(figures['tables'], indent=2), |
|
|
"generated_content": generated_json |
|
|
} |
|
|
|
|
|
prompt = template.render(**jinja_args) |
|
|
|
|
|
return prompt |
|
|
|
|
|
def _build_revision_prompt(self, review_json): |
|
|
with open('utils/prompt_templates/page_templates/full_content_revise.yaml', 'r') as f: |
|
|
planner_config = yaml.safe_load(f) |
|
|
|
|
|
jinja_env = Environment(undefined=StrictUndefined) |
|
|
template = jinja_env.from_string(planner_config["template"]) |
|
|
|
|
|
jinja_args = { |
|
|
"review_content": json.dumps(review_json, indent=2) |
|
|
} |
|
|
|
|
|
prompt = template.render(**jinja_args) |
|
|
|
|
|
return prompt |
|
|
|
|
|
def _build_revision_prompt_with_resume(self, review_json, current_content, figures): |
|
|
with open('utils/prompt_templates/page_templates/full_content_revise_with_resume.yaml', 'r') as f: |
|
|
planner_config = yaml.safe_load(f) |
|
|
|
|
|
jinja_env = Environment(undefined=StrictUndefined) |
|
|
template = jinja_env.from_string(planner_config["template"]) |
|
|
|
|
|
print(review_json) |
|
|
|
|
|
jinja_args = { |
|
|
"review_content": json.dumps(review_json, indent=2), |
|
|
"figures": json.dumps(figures, indent=2), |
|
|
"current_content": current_content |
|
|
} |
|
|
|
|
|
prompt = template.render(**jinja_args) |
|
|
|
|
|
return prompt |
|
|
|
|
|
def full_content_generation( |
|
|
self, |
|
|
args, |
|
|
paper_content, |
|
|
figures, |
|
|
generated_section, |
|
|
text_page_content, |
|
|
): |
|
|
""" |
|
|
Plan + Generate -> Review -> Revise |
|
|
|
|
|
Args: |
|
|
paper_content: parsed paper content |
|
|
figures: list/dict of figures |
|
|
generated_section: format_instructions / schema hints |
|
|
text_page_content: initial text-only page structure |
|
|
|
|
|
Returns: |
|
|
tuple: (final_generated_content_json, input_token_total, output_token_total) |
|
|
""" |
|
|
if args.resume in ['parse_pdf','generate_content']: |
|
|
|
|
|
print("full content generation start") |
|
|
|
|
|
with open('utils/prompt_templates/page_templates/full_content_generation.yaml', 'r') as f: |
|
|
planner_config = yaml.safe_load(f) |
|
|
|
|
|
jinja_env = Environment(undefined=StrictUndefined) |
|
|
template = jinja_env.from_string(planner_config["template"]) |
|
|
|
|
|
jinja_args = { |
|
|
'paper_content': paper_content, |
|
|
'figures': json.dumps(figures, indent=2), |
|
|
'project_page_content': json.dumps(text_page_content, indent=2) |
|
|
} |
|
|
|
|
|
prompt = template.render(**jinja_args) |
|
|
|
|
|
self.planner_agent.reset() |
|
|
response = self.planner_agent.step(prompt) |
|
|
|
|
|
gen_in_tok, gen_out_tok = account_token(response) |
|
|
|
|
|
current_output = get_json_from_response(response.msgs[0].content) |
|
|
|
|
|
first_path = f'project_contents/{self.args.paper_name}_generated_full_content.v0.json' |
|
|
with open(first_path, 'w', encoding='utf-8') as f: |
|
|
json.dump(current_output, f, ensure_ascii=False, indent=2) |
|
|
print(f" - Initial generation saved: {first_path}") |
|
|
|
|
|
total_in_tok, total_out_tok = gen_in_tok, gen_out_tok |
|
|
else: |
|
|
print("Skipping initial full content generation, loading existing content.") |
|
|
with open(f'project_contents/{self.args.paper_name}_generated_full_content.v0.json', 'r', encoding='utf-8') as f: |
|
|
current_output = json.load(f) |
|
|
total_in_tok, total_out_tok = 0, 0 |
|
|
|
|
|
for it in range(0, args.full_content_check_times): |
|
|
|
|
|
self.reviewer_agent.reset() |
|
|
|
|
|
review_prompt = self._build_reviewer_prompt( |
|
|
paper_content=paper_content, |
|
|
figures=figures, |
|
|
text_page_content=text_page_content, |
|
|
generated_json=current_output |
|
|
) |
|
|
review_resp = self.reviewer_agent.step(review_prompt) |
|
|
rin, rout = account_token(review_resp) |
|
|
|
|
|
review_json = get_json_from_response(review_resp.msgs[0].content) |
|
|
|
|
|
review_path = f'project_contents/{self.args.paper_name}_review.iter{it}.json' |
|
|
with open(review_path, 'w', encoding='utf-8') as f: |
|
|
json.dump(review_json, f, ensure_ascii=False, indent=2) |
|
|
print(f" - Review saved: {review_path}") |
|
|
|
|
|
total_in_tok += rin |
|
|
total_out_tok += rout |
|
|
|
|
|
if args.resume != 'full_content_check': |
|
|
revision_prompt = self._build_revision_prompt( |
|
|
review_json=review_json |
|
|
) |
|
|
|
|
|
else: |
|
|
revision_prompt = self._build_revision_prompt_with_resume( |
|
|
review_json=review_json, |
|
|
current_content=current_output, |
|
|
figures=figures |
|
|
) |
|
|
rev_resp = self.planner_agent.step(revision_prompt) |
|
|
rin2, rout2 = account_token(rev_resp) |
|
|
|
|
|
revised_output = get_json_from_response(rev_resp.msgs[0].content) |
|
|
|
|
|
out_path = f'project_contents/{self.args.paper_name}_generated_full_content.v{it+1}.json' |
|
|
with open(out_path, 'w', encoding='utf-8') as f: |
|
|
json.dump(revised_output, f, ensure_ascii=False, indent=2) |
|
|
print(f" - Revised generation saved: {out_path}") |
|
|
|
|
|
total_in_tok += rin2 |
|
|
total_out_tok += rout2 |
|
|
current_output = revised_output |
|
|
if self.args.human_input == '1': |
|
|
print('-'*50) |
|
|
print(Pretty(current_output, expand_all=True)) |
|
|
print('-'*50) |
|
|
user_feedback = input('The above is the final generated full content! If you are satisfied with the generated content, enter yes\n If not, enter your feedback.\n') |
|
|
while user_feedback.lower() != 'yes': |
|
|
message = BaseMessage.make_assistant_message( |
|
|
role_name='User', |
|
|
content='human feedback'+user_feedback +"The above is human feedback. Please make modifications based on this feedback and the original content.The output format is as specified above." |
|
|
) |
|
|
response = self.planner_agent.step(message) |
|
|
current_output = get_json_from_response(response.msgs[0].content) |
|
|
print('-'*50) |
|
|
print(Pretty(current_output, expand_all=True)) |
|
|
print('-'*50) |
|
|
user_feedback = input('The above is the final generated full content! If you are satisfied with the generated content, enter yes. \n If not, enter your feedback.\n') |
|
|
in_tok, out_tok = account_token(response) |
|
|
total_in_tok += in_tok |
|
|
total_out_tok += out_tok |
|
|
|
|
|
|
|
|
final_path = f'project_contents/{self.args.paper_name}_generated_full_content.json' |
|
|
with open(final_path, 'w', encoding='utf-8') as f: |
|
|
json.dump(current_output, f, ensure_ascii=False, indent=2) |
|
|
print(f"full content generation completed. Tokens: {total_in_tok} -> {total_out_tok}") |
|
|
print(f" - Final content: {final_path}") |
|
|
|
|
|
return current_output, total_in_tok, total_out_tok |
|
|
|
|
|
def section_generation(self, paper_content, figures): |
|
|
""" |
|
|
Plan the content structure for the project page. |
|
|
|
|
|
Args: |
|
|
paper_content: Parsed paper content |
|
|
|
|
|
Returns: |
|
|
dict: project page content |
|
|
""" |
|
|
|
|
|
|
|
|
|
|
|
with open('utils/prompt_templates/page_templates/section_generation.yaml', 'r') as f: |
|
|
planner_config = yaml.safe_load(f) |
|
|
|
|
|
jinja_env = Environment(undefined=StrictUndefined) |
|
|
template = jinja_env.from_string(planner_config["template"]) |
|
|
|
|
|
json_format_example = """ |
|
|
```json |
|
|
{{ |
|
|
"Introduction": "Brief overview of the paper's main topic and objectives.", |
|
|
"Methodology": "Description of the methods used in the research.", |
|
|
"Results": "Summary of the key findings and results." |
|
|
}} |
|
|
``` |
|
|
""" |
|
|
|
|
|
|
|
|
jinja_args = { |
|
|
'paper_content': paper_content, |
|
|
'json_format_example': json.dumps(paper_content, indent=2) |
|
|
} |
|
|
|
|
|
prompt = template.render(**jinja_args) |
|
|
|
|
|
|
|
|
self.planner_agent.reset() |
|
|
response = self.planner_agent.step(prompt) |
|
|
input_token, output_token = account_token(response) |
|
|
generated_section = get_json_from_response(response.msgs[0].content) |
|
|
|
|
|
if self.args.human_input == '1': |
|
|
print('-'*50) |
|
|
print(Pretty(generated_section, expand_all=True)) |
|
|
print('-'*50) |
|
|
user_feedback = input('The above is the generated section! If you are satisfied with the generated section, enter yes. \nIf not, enter your feedback.\n') |
|
|
while user_feedback.lower() != 'yes': |
|
|
message = BaseMessage.make_assistant_message( |
|
|
role_name='User', |
|
|
content='human feedback'+user_feedback +"The above is human feedback. Please make modifications based on this feedback and the original content.The output format is as specified above." |
|
|
) |
|
|
response = self.planner_agent.step(message) |
|
|
generated_section = get_json_from_response(response.msgs[0].content) |
|
|
print('-'*50) |
|
|
print(Pretty(generated_section, expand_all=True)) |
|
|
print('-'*50) |
|
|
user_feedback = input('The above is the generated section! If you are satisfied with the generated section, enter yes. \nIf not, enter your feedback.\n') |
|
|
in_tok, out_tok = account_token(response) |
|
|
input_token += in_tok |
|
|
output_token += out_tok |
|
|
|
|
|
print(f"section planning completed. Tokens: {input_token} -> {output_token}") |
|
|
|
|
|
def create_dynamic_page_dict(sections: dict[str, str]) -> dict[str, str]: |
|
|
poster_dict = { |
|
|
"title": "Title of the paper", |
|
|
"authors": "Authors of the paper, Each author must be accompanied by the superscript number(s) of their corresponding affiliation(s).", |
|
|
"affiliation": "Affiliation of the authors, each affiliation must be accompanied by the corresponding superscript number.", |
|
|
} |
|
|
|
|
|
poster_dict.update(sections) |
|
|
return poster_dict |
|
|
|
|
|
generated_section = create_dynamic_page_dict(generated_section) |
|
|
|
|
|
|
|
|
|
|
|
generated_path = f'project_contents/{self.args.paper_name}_generated_section.json' |
|
|
with open(generated_path, 'w') as f: |
|
|
json.dump(generated_section, f, indent=4) |
|
|
|
|
|
print(f" - Generated section plan: {generated_path}") |
|
|
|
|
|
return generated_section, input_token, output_token |
|
|
|
|
|
def text_content_generation(self, paper_content, figures, generated_section): |
|
|
""" |
|
|
Plan the content structure for the project page. |
|
|
|
|
|
Args: |
|
|
paper_content: Parsed paper content |
|
|
|
|
|
Returns: |
|
|
dict: project page content |
|
|
""" |
|
|
|
|
|
|
|
|
figures_ = {} |
|
|
figures_['images'] = [{k: v for k, v in value.items() if k != 'tag'} for value in figures['images'].values()] |
|
|
figures_['tables'] = [{k: v for k, v in value.items() if k != 'tag'} for value in figures['tables'].values()] |
|
|
|
|
|
|
|
|
with open('utils/prompt_templates/page_templates/text_content_generation.yaml', 'r') as f: |
|
|
planner_config = yaml.safe_load(f) |
|
|
|
|
|
jinja_env = Environment(undefined=StrictUndefined) |
|
|
template = jinja_env.from_string(planner_config["template"]) |
|
|
|
|
|
|
|
|
jinja_args = { |
|
|
'paper_content': paper_content, |
|
|
'figures': json.dumps(figures_, indent=2), |
|
|
'format_instructions': json.dumps(generated_section, indent=2) |
|
|
} |
|
|
|
|
|
prompt = template.render(**jinja_args) |
|
|
|
|
|
|
|
|
self.planner_agent.reset() |
|
|
response = self.planner_agent.step(prompt) |
|
|
input_token, output_token = account_token(response) |
|
|
|
|
|
generated_text_content = get_json_from_response(response.msgs[0].content) |
|
|
|
|
|
print(f"text content generation completed. Tokens: {input_token} -> {output_token}") |
|
|
|
|
|
|
|
|
generated_path = f'project_contents/{self.args.paper_name}_generated_text_content.json' |
|
|
with open(generated_path, 'w') as f: |
|
|
json.dump(generated_text_content, f, indent=4) |
|
|
|
|
|
print(f" - Generated text content: {generated_path}") |
|
|
|
|
|
return generated_text_content, input_token, output_token |
|
|
|
|
|
def filter_raw_content(self, paper_content, figures): |
|
|
paper_content = filter_references(paper_content) |
|
|
|
|
|
with open('utils/prompt_templates/page_templates/filter_figures.yaml', 'r') as f: |
|
|
planner_config = yaml.safe_load(f) |
|
|
|
|
|
jinja_env = Environment(undefined=StrictUndefined) |
|
|
template = jinja_env.from_string(planner_config["template"]) |
|
|
|
|
|
|
|
|
jinja_args = { |
|
|
'paper_content': paper_content, |
|
|
'figures': json.dumps(figures, indent=2), |
|
|
} |
|
|
|
|
|
prompt = template.render(**jinja_args) |
|
|
|
|
|
|
|
|
self.planner_agent.reset() |
|
|
response = self.planner_agent.step(prompt) |
|
|
input_token, output_token = account_token(response) |
|
|
filtered_figures = get_json_from_response(response.msgs[0].content) |
|
|
|
|
|
|
|
|
def remove_items_without_section(data: dict) -> dict: |
|
|
|
|
|
for key in ["images", "tables"]: |
|
|
if key in data and isinstance(data[key], dict): |
|
|
data[key] = { |
|
|
k: v for k, v in data[key].items() |
|
|
if v.get("original_section") is not None |
|
|
} |
|
|
return data |
|
|
|
|
|
filtered_figures = remove_items_without_section(filtered_figures) |
|
|
|
|
|
print(f"filtered figures generation completed. Tokens: {input_token} -> {output_token}") |
|
|
|
|
|
|
|
|
generated_path = f'project_contents/{self.args.paper_name}_generated_filtered_figures.json' |
|
|
with open(generated_path, 'w') as f: |
|
|
json.dump(filtered_figures, f, indent=4) |
|
|
|
|
|
print(f" - Generated filtered figures: {generated_path}") |
|
|
|
|
|
return paper_content, filtered_figures, input_token, output_token |
|
|
|
|
|
|
|
|
|