Spaces:

peihsin
/

RS-AAAI

Sleeping

App Files Files Community

peihsin0715 commited on Sep 10

Commit

3ca6cb8

1 Parent(s): 97a31eb

Fix data saving

Browse files

Files changed (3) hide show

backend/server.py +10 -10
backend/utils/finetune.py +1 -1
backend/utils/utils.py +1 -1

backend/server.py CHANGED Viewed

@@ -54,7 +54,7 @@ _MODELS = {}
 _CURRENT_DATASET = None
 _GENERATION_RESULTS = None
-@app.route('/data/<path:filename>')
 def serve_data(filename):
     import os
     from flask import Response
@@ -419,7 +419,7 @@ def run_pipeline():
         timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
         os.makedirs("data", exist_ok=True)
-        output_file = f"data/pipeline_generation_{timestamp}.csv"
         evaluated_results.to_csv(output_file, index=False)
         results['generation_file'] = output_file
         results['generation_samples'] = len(evaluated_results)
@@ -431,7 +431,7 @@ def run_pipeline():
             category_col="category",
             num_cf_per_row=num_cf_per_row
         )
-        augmented_file = f"data/pipeline_generation_cf_augmented_{timestamp}.csv"
         augmented_results.to_csv(augmented_file, index=False)
         results['counterfactual_file'] = augmented_file
         results['counterfactual_added'] = len(augmented_results) - len(evaluated_results)
@@ -465,7 +465,7 @@ def run_pipeline():
             mid_point = len(evaluated_results) // 2
             best_sent_subset = evaluated_results.iloc[:mid_point].copy()
-        sent_file = f"data/pipeline_sent_subset_{timestamp}.csv"
         best_sent_subset.to_csv(sent_file, index=False)
         print(f"Pipeline Step 5: Rank sampling on CF-augmented results...(iterations={iterations}, temp={tau})")
@@ -476,7 +476,7 @@ def run_pipeline():
             mid_point = len(augmented_results) // 2
             cf_best_sent_subset = augmented_results.iloc[:mid_point].copy()
-        cf_sent_file = f"data/pipeline_cf_sent_subset_{timestamp}.csv"
         cf_best_sent_subset.to_csv(cf_sent_file, index=False)
         orig_means = _mean_by_cat(best_sent_subset, selected_groups_used)
@@ -519,8 +519,8 @@ def run_pipeline():
         print("[Plot check exists]", cf_path,   os.path.exists(cf_path))
         results['plots'] = {
-            'original_sentiment': f"/data/{orig_sent_title}.png",
-            'counterfactual_sentiment': f"/data/{cf_sent_title}.png",
         }
         print("[Plot urls]", results['plots'])
@@ -534,7 +534,7 @@ def run_pipeline():
             lr = float(ft_cfg.get("learningRate", 5e-5))
             input_csv = augmented_file
-            ft_output_dir = f"data/ft_{timestamp}"
             os.makedirs(ft_output_dir, exist_ok=True)
             try:
@@ -548,10 +548,10 @@ def run_pipeline():
                 )
                 print(f"[Fine-tune] Saved fine-tuned model to {ft_output_dir}")
                 results["finetuned_model_dir"] = ft_output_dir
-                zip_base = f"data/ft_{timestamp}"
                 import shutil
                 zip_path = shutil.make_archive(zip_base, 'zip', ft_output_dir)
-                results["finetuned_model_zip"] = f"/data/{os.path.basename(zip_path)}"
             except Exception as fe:
                 print(f"[Fine-tune] Failed: {fe}")
                 results["finetuned_model_error"] = str(fe)

 _CURRENT_DATASET = None
 _GENERATION_RESULTS = None
+@app.route('/tmp/<path:filename>')
 def serve_data(filename):
     import os
     from flask import Response
         timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
         os.makedirs("data", exist_ok=True)
+        output_file = f"/tmp/pipeline_generation_{timestamp}.csv"
         evaluated_results.to_csv(output_file, index=False)
         results['generation_file'] = output_file
         results['generation_samples'] = len(evaluated_results)
             category_col="category",
             num_cf_per_row=num_cf_per_row
         )
+        augmented_file = f"/tmp/pipeline_generation_cf_augmented_{timestamp}.csv"
         augmented_results.to_csv(augmented_file, index=False)
         results['counterfactual_file'] = augmented_file
         results['counterfactual_added'] = len(augmented_results) - len(evaluated_results)
             mid_point = len(evaluated_results) // 2
             best_sent_subset = evaluated_results.iloc[:mid_point].copy()
+        sent_file = f"/tmp/pipeline_sent_subset_{timestamp}.csv"
         best_sent_subset.to_csv(sent_file, index=False)
         print(f"Pipeline Step 5: Rank sampling on CF-augmented results...(iterations={iterations}, temp={tau})")
             mid_point = len(augmented_results) // 2
             cf_best_sent_subset = augmented_results.iloc[:mid_point].copy()
+        cf_sent_file = f"/tmp/pipeline_cf_sent_subset_{timestamp}.csv"
         cf_best_sent_subset.to_csv(cf_sent_file, index=False)
         orig_means = _mean_by_cat(best_sent_subset, selected_groups_used)
         print("[Plot check exists]", cf_path,   os.path.exists(cf_path))
         results['plots'] = {
+            'original_sentiment': f"/tmp/{orig_sent_title}.png",
+            'counterfactual_sentiment': f"/tmp/{cf_sent_title}.png",
         }
         print("[Plot urls]", results['plots'])
             lr = float(ft_cfg.get("learningRate", 5e-5))
             input_csv = augmented_file
+            ft_output_dir = f"/tmp/ft_{timestamp}"
             os.makedirs(ft_output_dir, exist_ok=True)
             try:
                 )
                 print(f"[Fine-tune] Saved fine-tuned model to {ft_output_dir}")
                 results["finetuned_model_dir"] = ft_output_dir
+                zip_base = f"/tmp/ft_{timestamp}"
                 import shutil
                 zip_path = shutil.make_archive(zip_base, 'zip', ft_output_dir)
+                results["finetuned_model_zip"] = f"/tmp/{os.path.basename(zip_path)}"
             except Exception as fe:
                 print(f"[Fine-tune] Failed: {fe}")
                 results["finetuned_model_error"] = str(fe)

backend/utils/finetune.py CHANGED Viewed

@@ -28,7 +28,7 @@ def build_text_column(df: pd.DataFrame) -> pd.Series:
 def finetune_gpt2_from_csv(
     csv_path: str,
     base_model: str = "gpt2",
-    output_dir: str = "data/ft_gpt2_out",
     train_split: float = 0.9,
     epochs: int = 3,
     lr: float = 5e-5,

 def finetune_gpt2_from_csv(
     csv_path: str,
     base_model: str = "gpt2",
+    output_dir: str = "/tmp/ft_gpt2_out",
     train_split: float = 0.9,
     epochs: int = 3,
     lr: float = 5e-5,

backend/utils/utils.py CHANGED Viewed

@@ -103,7 +103,7 @@ def load_model_and_tokenizer(model_name: str):
         raise RuntimeError(f"Failed to load model '{model_name}': {e}")
 def finetune(train_texts, tokenizer, model, num_epochs=20, output_dir='./data'):
-    train_path = f"data/train.txt"
     with open(train_path, "w", encoding="utf-8") as f:
         for text in train_texts:

         raise RuntimeError(f"Failed to load model '{model_name}': {e}")
 def finetune(train_texts, tokenizer, model, num_epochs=20, output_dir='./data'):
+    train_path = f"/tmp/train.txt"
     with open(train_path, "w", encoding="utf-8") as f:
         for text in train_texts: