Spaces:

PledgeTracker
/

Pledge_Tracker

Sleeping

App Files Files Community

yulongchen commited on Jun 15

Commit

fcd14e1

1 Parent(s): 44ef3dd

Add system

Browse files

Files changed (6) hide show

app.py +4 -61
system/augmented_searching.py +1 -3
system/baseline/reranking_optimized.py +2 -2
system/ee.py +2 -2
system/initial_searching.py +0 -1
system/process_time.py +1 -1

app.py CHANGED Viewed

@@ -169,18 +169,17 @@ def run_model():
         events = df.to_dict(orient="records")
         log_entry = {
             "requested_time": timestamp,
             "pledge": claim,
             "suggestion_meta": suggestion_meta,
-            "user_id": user_id,
             "pledge_author": pledge_author,
             "pledge_date": pledge_date,
             "events": events
         }
         default_log_path = f"{FEEDBACK_DIR}/feedback_{timestamp}_{user_id}.jsonl"
-        # step_id = outputs["step_id"]
-        # if update_status:
-        #     update_status(step_id, "All done!")
-        #     step_id += 1
         with open(default_log_path, "w") as f:
             f.write(json.dumps(log_entry, indent=1))
@@ -279,62 +278,6 @@ def receive_feedback():
     return jsonify({'status': 'success'})
-# @app.route("/api/feedback", methods=["POST"])
-# def receive_feedback():
-#     data = request.get_json()
-#     pledge = data.get("pledge", "no_pledge_text")
-#     feedback_list = data.get("feedback", [])
-#     filename = data.get("file")
-#     file_path = os.path.join(TMP_DIR, filename)
-#     pledge_date = data.get("pledge_date", "")
-#     pledge_author = data.get("pledge_author", "")
-#     if not os.path.exists(file_path):
-#         return jsonify({"error": "Event file not found"}), 400
-#     with open(file_path, "r") as f:
-#         events = json.load(f)
-#     feedback_dict = {int(item['eventIndex']): item['answer'] for item in feedback_list}
-#     for idx, event in enumerate(events):
-#         event["user_feedback"] = feedback_dict.get(idx)
-#     log_entry = {
-#         "requested_time": data.get("timestamp"),
-#         "user_id": data.get("user_id"),
-#         "pledge": pledge,
-#         "pledge_author": pledge_author,
-#         "pledge_date": pledge_date,
-#         "events": events
-#     }
-#     timestamp = data.get("timestamp")
-#     user_id = data.get("user_id")
-#     timestamp = data.get("timestamp")
-#     if not user_id or not timestamp:
-#         return jsonify({'status': 'error', 'detail': 'Missing user_id or timestamp'}), 400
-#     local_filename = f"{FEEDBACK_DIR}/feedback_{timestamp}_{user_id}.jsonl"
-#     with open(local_filename, "w") as f:
-#         f.write(json.dumps(log_entry, indent=1))
-#     try:
-#         api = HfApi()
-#         api.upload_file(
-#             path_or_fileobj=local_filename,
-#             path_in_repo=f"logs/feedback_{timestamp}_{user_id}.jsonl",
-#             repo_id=HF_DATASET_REPO,
-#             repo_type="dataset",
-#             token=HF_TOKEN
-#         )
-#     except Exception as e:
-#         return jsonify({'status': 'partial_success', 'error': str(e)}), 500
-#     return jsonify({'status': 'success'})
 @app.route("/download-feedback/<filename>")
 def download_feedback_file(filename):
     return send_from_directory(FEEDBACK_DIR, filename, as_attachment=True)

         events = df.to_dict(orient="records")
         log_entry = {
             "requested_time": timestamp,
+            "user_id": user_id,
             "pledge": claim,
             "suggestion_meta": suggestion_meta,
+            "time_start": time_start,
+            "time_end": time_end,
             "pledge_author": pledge_author,
             "pledge_date": pledge_date,
             "events": events
         }
         default_log_path = f"{FEEDBACK_DIR}/feedback_{timestamp}_{user_id}.jsonl"
         with open(default_log_path, "w") as f:
             f.write(json.dumps(log_entry, indent=1))
     return jsonify({'status': 'success'})
 @app.route("/download-feedback/<filename>")
 def download_feedback_file(filename):
     return send_from_directory(FEEDBACK_DIR, filename, as_attachment=True)

system/augmented_searching.py CHANGED Viewed

@@ -8,7 +8,7 @@ from pathlib import Path
 import spacy
 def google_search(query, api_key, search_engine_id, start_date, end_date):
-    print(f"[SYSTEM] Calling Google Search API for: {query}")
     sort = f"date:r:{start_date}:{end_date}"
     url = "https://www.googleapis.com/customsearch/v1"
     params = {
@@ -56,7 +56,6 @@ def run_augmented_searching(qa_file, pipeline_base_dir, suggestion_meta, pledge_
         qa_lines = open(f"{qa_file}","r").readlines()[idx]
         qa_lines = json.loads(qa_lines)
         claim_text = f"{qa_lines['claim']}"
-        print(qa_lines)
     api_key = os.environ.get("GOOGLE_API_KEY")
@@ -79,7 +78,6 @@ def run_augmented_searching(qa_file, pipeline_base_dir, suggestion_meta, pledge_
     results = google_search(claim_text, api_key, search_engine_id, start_date, end_date)
-    print(results)
     for result in results:
         if result["link"] not in urls and "fullfact.org/government-tracker" not in result["link"]:
             string_values.append("claim")

 import spacy
 def google_search(query, api_key, search_engine_id, start_date, end_date):
+    # print(f"[SYSTEM] Calling Google Search API for: {query}")
     sort = f"date:r:{start_date}:{end_date}"
     url = "https://www.googleapis.com/customsearch/v1"
     params = {
         qa_lines = open(f"{qa_file}","r").readlines()[idx]
         qa_lines = json.loads(qa_lines)
         claim_text = f"{qa_lines['claim']}"
     api_key = os.environ.get("GOOGLE_API_KEY")
     results = google_search(claim_text, api_key, search_engine_id, start_date, end_date)
     for result in results:
         if result["link"] not in urls and "fullfact.org/government-tracker" not in result["link"]:
             string_values.append("claim")

system/baseline/reranking_optimized.py CHANGED Viewed

@@ -78,10 +78,10 @@ def select_top_k(claim, results, top_k):
     top_k_sentences_urls = []
     i = 0
-    print(results)
     claim = remove_special_chars_except_spaces(claim).lower()
     while len(top_k_sentences_urls) < top_k and i < len(results):
-        print(i)
         sentence = remove_special_chars_except_spaces(results[i]['sentence']).lower()
         if sentence not in dup_check:

     top_k_sentences_urls = []
     i = 0
+    # print(results)
     claim = remove_special_chars_except_spaces(claim).lower()
     while len(top_k_sentences_urls) < top_k and i < len(results):
+        # print(i)
         sentence = remove_special_chars_except_spaces(results[i]['sentence']).lower()
         if sentence not in dup_check:

system/ee.py CHANGED Viewed

@@ -49,7 +49,7 @@ def run_gpt4_event_extraction(data_dir, max_tokens=100000):
         output_path = os.path.join(output_dir, f"gpt4o_results_{ID}_claim.json")
         if os.path.exists(output_path):
-            print(f"输出已存在 {output_path}")
         else:
@@ -71,7 +71,7 @@ def run_gpt4_event_extraction(data_dir, max_tokens=100000):
                 try:
                     output = gpt_4o(input_text)
-                    print(f"GPT-4o Response: {output}")
                     results.append({
                         "url": doc["url"],
                         "title": doc["metadata"]["title"],

         output_path = os.path.join(output_dir, f"gpt4o_results_{ID}_claim.json")
         if os.path.exists(output_path):
+            print(f"Already exist: {output_path}")
         else:
                 try:
                     output = gpt_4o(input_text)
+                    # print(f"GPT-4o Response: {output}")
                     results.append({
                         "url": doc["url"],
                         "title": doc["metadata"]["title"],

system/initial_searching.py CHANGED Viewed

@@ -11,7 +11,6 @@ import subprocess
 try:
     nlp = spacy.load("en_core_web_sm")
 except OSError:
-    print("🔁 Downloading en_core_web_sm model ...")
     subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"], check=True)
     nlp = spacy.load("en_core_web_sm")

 try:
     nlp = spacy.load("en_core_web_sm")
 except OSError:
     subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"], check=True)
     nlp = spacy.load("en_core_web_sm")

system/process_time.py CHANGED Viewed

@@ -223,7 +223,7 @@ def extract_and_sort_events(data_dir, pledge_date, pledge_author, claim, suggest
                 else:
                     event_date_and_pub_date = original_date
-                test_instance = f"Pledge: {pledge} (Speaker: {pledge_author}; Pledge Date: {pledge_date})\nEvent Summary: {event['event']} (Event Date: {original_date})\nIs this event summary useful?"
                 # print(test_instance)

                 else:
                     event_date_and_pub_date = original_date
+                test_instance = f"Pledge: {pledge} (Speaker: {pledge_author}; Pledge Date: {pledge_date})\nEvent Summary: {event['event']} (Event Date: {original_date})\nIs this event summary useful to track the fulfilment of this pledge"
                 # print(test_instance)