File size: 2,378 Bytes
fcaa164
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
# rebuild the pptx from saved code steps.jsonl
import os
import shutil
import sys
from copy import deepcopy
from glob import glob

import func_argparse
import jsonlines
import tqdm

from apis import CodeExecutor, HistoryMark
from presentation import Presentation
from utils import Config, pjoin, ppt_to_images

config = Config("/tmp")
code_executor = CodeExecutor(0)


def rebuild_pptx(agent_steps: str, prs: Presentation):
    slides = []
    steps = list(jsonlines.open(agent_steps))
    if len(steps) == 0:
        os.remove(agent_steps)
        raise ValueError(f"Jump {agent_steps} as no steps")
    if steps[-1][0] != HistoryMark.API_CALL_CORRECT:
        raise ValueError(f"Jump {agent_steps} as last step is failed")
    for mark, slide_idx, actions in steps:
        if mark != HistoryMark.API_CALL_CORRECT:
            continue
        slides.append(deepcopy(prs.slides[slide_idx - 1]))  # slide_idx starts from 1
        feedback = code_executor.execute_actions(actions, slides[-1])
        assert feedback is None, feedback
    return slides


def rebuild_all(
    setting: str = "*", topic: str = "*", out_filename: str = "rebuild.pptx"
):
    for folder in tqdm.tqdm(glob(f"data/{topic}/pptx/*")):
        prs = Presentation.from_file(pjoin(folder, "source.pptx"), config)
        pptx_container = deepcopy(prs)
        for agent_steps in glob(pjoin(folder, setting, "*", "agent_steps.jsonl")):
            dst = pjoin(os.path.dirname(agent_steps), out_filename)
            if os.path.exists(dst):
                continue
            try:
                pptx_container.slides = rebuild_pptx(agent_steps, prs)
                pptx_container.save(dst)
            except Exception as e:
                continue


if __name__ == "__main__":
    if len(sys.argv) != 1:
        func_argparse.main(rebuild_all)

    else:
        shutil.rmtree("./test", ignore_errors=True)
        os.makedirs("./test", exist_ok=True)

        source_folder = (
            "data/education/pptx/Open Science - PhD Human Rights - 2021 - module 3"
        )
        setting = "PPTCrew-Qwen2.5+Qwen2.5+Qwen2-VL"
        pdf = "37-105-1-PB (3)"

        prs = Presentation.from_file(pjoin(source_folder, "source.pptx"), config)
        container = deepcopy(prs)
        container.slides = rebuild_pptx(
            pjoin(source_folder, setting, pdf, "agent_steps.jsonl"), prs
        )