yulongchen commited on
Commit
b2a8297
Β·
1 Parent(s): bf32721

Add system

Browse files
Files changed (3) hide show
  1. .DS_Store +0 -0
  2. system/.DS_Store +0 -0
  3. system/hero_QA.py +0 -60
.DS_Store CHANGED
Binary files a/.DS_Store and b/.DS_Store differ
 
system/.DS_Store CHANGED
Binary files a/system/.DS_Store and b/system/.DS_Store differ
 
system/hero_QA.py DELETED
@@ -1,60 +0,0 @@
1
- import os
2
- from datetime import datetime
3
- import subprocess
4
-
5
-
6
- def run_hero_reranking(user_id, end_date):
7
- base_dir = f"outputs/{user_id}_{end_date}"
8
- hero_dir = os.path.join(base_dir, "hero")
9
- os.makedirs(hero_dir, exist_ok=True)
10
-
11
- hyde_output = os.path.join(hero_dir, "manifesto_icl_hyde_fc.json")
12
-
13
- def safe_run(cmd, timeout=600):
14
- try:
15
- print(f"Running: {' '.join(cmd)}")
16
- subprocess.run(cmd, check=True, timeout=timeout)
17
- except subprocess.CalledProcessError as e:
18
- print(f"[❌ ERROR] Subprocess failed: {e}")
19
- if e.stderr:
20
- print("[stderr]:", e.stderr.decode())
21
- raise
22
- except subprocess.TimeoutExpired:
23
- print(f"[❌ TIMEOUT] Command timed out: {' '.join(cmd)}")
24
- raise
25
-
26
- # Step 3.2: retrieval
27
- print("πŸ” Step 3.2: Retrieval from knowledge store ...")
28
- knowledge_store_dir = os.path.join(base_dir, "augmented_data_store")
29
- retrieval_output = os.path.join(hero_dir, "manifesto_icl_retrieval_top_k_QA.json")
30
-
31
- if not os.path.exists(retrieval_output):
32
- safe_run([
33
- "python3.12", "baseline/retrieval_optimized.py",
34
- "--knowledge_store_dir", knowledge_store_dir,
35
- "--target_data", hyde_output,
36
- "--json_output", retrieval_output
37
- ])
38
-
39
- # Step 3.3: reranking
40
- print("🏷️ Step 3.3: Reranking retrieved evidence ...")
41
- rerank_output = os.path.join(hero_dir, "manifesto_icl_reranking_top_k_QA.json")
42
-
43
- if not os.path.exists(rerank_output):
44
- safe_run([
45
- "python3.12", "baseline/reranking_optimized.py",
46
- "--target_data", retrieval_output,
47
- "--json_output", rerank_output
48
- ])
49
-
50
- return {
51
- "hyde": hyde_output,
52
- "retrieved": retrieval_output,
53
- "reranked": rerank_output,
54
- }
55
-
56
-
57
- if __name__ == "__main__":
58
- output_files = run_step3_hero_pipeline(user_id="xxx", end_date="20250604")
59
- for key, path in output_files.items():
60
- print(f"βœ… {key}: {path}")