Spaces:
Sleeping
Sleeping
| import os | |
| from datetime import datetime | |
| import subprocess | |
| def run_hero_reranking(user_id, end_date): | |
| base_dir = f"outputs/{user_id}_{end_date}" | |
| hero_dir = os.path.join(base_dir, "hero") | |
| os.makedirs(hero_dir, exist_ok=True) | |
| hyde_output = os.path.join(hero_dir, "manifesto_icl_hyde_fc.json") | |
| def safe_run(cmd, timeout=600): | |
| try: | |
| print(f"π Running: {' '.join(cmd)}") | |
| subprocess.run(cmd, check=True, timeout=timeout) | |
| except subprocess.CalledProcessError as e: | |
| print(f"[β ERROR] Subprocess failed: {e}") | |
| if e.stderr: | |
| print("[stderr]:", e.stderr.decode()) | |
| raise | |
| except subprocess.TimeoutExpired: | |
| print(f"[β TIMEOUT] Command timed out: {' '.join(cmd)}") | |
| raise | |
| # Step 3.2: retrieval | |
| print("π Step 3.2: Retrieval from knowledge store ...") | |
| knowledge_store_dir = os.path.join(base_dir, "augmented_data_store") | |
| retrieval_output = os.path.join(hero_dir, "manifesto_icl_retrieval_top_k_QA.json") | |
| if not os.path.exists(retrieval_output): | |
| safe_run([ | |
| "python3.12", "baseline/retrieval_optimized.py", | |
| "--knowledge_store_dir", knowledge_store_dir, | |
| "--target_data", hyde_output, | |
| "--json_output", retrieval_output | |
| ]) | |
| # Step 3.3: reranking | |
| print("π·οΈ Step 3.3: Reranking retrieved evidence ...") | |
| rerank_output = os.path.join(hero_dir, "manifesto_icl_reranking_top_k_QA.json") | |
| if not os.path.exists(rerank_output): | |
| safe_run([ | |
| "python3.12", "baseline/reranking_optimized.py", | |
| "--target_data", retrieval_output, | |
| "--json_output", rerank_output | |
| ]) | |
| return { | |
| "hyde": hyde_output, | |
| "retrieved": retrieval_output, | |
| "reranked": rerank_output, | |
| } | |
| if __name__ == "__main__": | |
| output_files = run_step3_hero_pipeline(user_id="xxx", end_date="20250604") | |
| for key, path in output_files.items(): | |
| print(f"β {key}: {path}") | |