Simon Strandgaard
commited on
Commit
·
977ad93
1
Parent(s):
7247691
purge_old_runs() now takes a prefix parameter, to be more certain that the data being deleted, really are intended to be deleted.
Browse files
src/purge/purge_old_runs.py
CHANGED
|
@@ -7,9 +7,9 @@ import time
|
|
| 7 |
|
| 8 |
logger = logging.getLogger(__name__)
|
| 9 |
|
| 10 |
-
def purge_old_runs(run_dir: str, max_age_hours: float = 1.0) -> None:
|
| 11 |
"""
|
| 12 |
-
Deletes runs in the specified run_dir older than max_age_hours.
|
| 13 |
"""
|
| 14 |
if not os.path.isabs(run_dir):
|
| 15 |
raise ValueError(f"run_dir must be an absolute path: {run_dir}")
|
|
@@ -18,6 +18,9 @@ def purge_old_runs(run_dir: str, max_age_hours: float = 1.0) -> None:
|
|
| 18 |
cutoff = now - datetime.timedelta(hours=max_age_hours)
|
| 19 |
|
| 20 |
for run_id in os.listdir(run_dir):
|
|
|
|
|
|
|
|
|
|
| 21 |
run_path = os.path.join(run_dir, run_id)
|
| 22 |
if not os.path.isdir(run_path):
|
| 23 |
continue # Skip files
|
|
@@ -35,7 +38,7 @@ def purge_old_runs(run_dir: str, max_age_hours: float = 1.0) -> None:
|
|
| 35 |
except Exception as e:
|
| 36 |
logger.error(f"Error processing {run_id} in {run_dir}: {e}")
|
| 37 |
|
| 38 |
-
def start_purge_scheduler(run_dir: str, purge_interval_seconds: float=3600) -> None:
|
| 39 |
"""
|
| 40 |
Start the purge scheduler in a background thread.
|
| 41 |
"""
|
|
@@ -51,7 +54,7 @@ def start_purge_scheduler(run_dir: str, purge_interval_seconds: float=3600) -> N
|
|
| 51 |
while True:
|
| 52 |
logger.info("Running purge...")
|
| 53 |
print("Running purge...")
|
| 54 |
-
purge_old_runs(run_dir)
|
| 55 |
time.sleep(purge_interval_seconds)
|
| 56 |
|
| 57 |
purge_thread = threading.Thread(target=purge_scheduler, daemon=True)
|
|
|
|
| 7 |
|
| 8 |
logger = logging.getLogger(__name__)
|
| 9 |
|
| 10 |
+
def purge_old_runs(run_dir: str, max_age_hours: float = 1.0, prefix: str = "myrun_") -> None:
|
| 11 |
"""
|
| 12 |
+
Deletes runs in the specified run_dir older than max_age_hours and matching the specified prefix.
|
| 13 |
"""
|
| 14 |
if not os.path.isabs(run_dir):
|
| 15 |
raise ValueError(f"run_dir must be an absolute path: {run_dir}")
|
|
|
|
| 18 |
cutoff = now - datetime.timedelta(hours=max_age_hours)
|
| 19 |
|
| 20 |
for run_id in os.listdir(run_dir):
|
| 21 |
+
if not run_id.startswith(prefix):
|
| 22 |
+
continue # Skip files and directories that don't match the prefix
|
| 23 |
+
|
| 24 |
run_path = os.path.join(run_dir, run_id)
|
| 25 |
if not os.path.isdir(run_path):
|
| 26 |
continue # Skip files
|
|
|
|
| 38 |
except Exception as e:
|
| 39 |
logger.error(f"Error processing {run_id} in {run_dir}: {e}")
|
| 40 |
|
| 41 |
+
def start_purge_scheduler(run_dir: str, purge_interval_seconds: float=3600, prefix: str = "myrun_") -> None:
|
| 42 |
"""
|
| 43 |
Start the purge scheduler in a background thread.
|
| 44 |
"""
|
|
|
|
| 54 |
while True:
|
| 55 |
logger.info("Running purge...")
|
| 56 |
print("Running purge...")
|
| 57 |
+
purge_old_runs(run_dir, prefix=prefix)
|
| 58 |
time.sleep(purge_interval_seconds)
|
| 59 |
|
| 60 |
purge_thread = threading.Thread(target=purge_scheduler, daemon=True)
|
src/purge/tests/test_purge_old_runs.py
CHANGED
|
@@ -6,9 +6,7 @@ from src.purge.purge_old_runs import purge_old_runs
|
|
| 6 |
|
| 7 |
class TestPurgeOldRuns(unittest.TestCase):
|
| 8 |
def setUp(self):
|
| 9 |
-
"""
|
| 10 |
-
Set up test environment before each test.
|
| 11 |
-
"""
|
| 12 |
# Create a temporary directory for the runs
|
| 13 |
self.test_run_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "test_run"))
|
| 14 |
if os.path.exists(self.test_run_dir):
|
|
@@ -16,26 +14,23 @@ class TestPurgeOldRuns(unittest.TestCase):
|
|
| 16 |
os.makedirs(self.test_run_dir, exist_ok=True)
|
| 17 |
|
| 18 |
# Create some dummy run directories with different modification times
|
| 19 |
-
self.create_dummy_run("
|
| 20 |
-
self.create_dummy_run("
|
| 21 |
-
self.create_dummy_run("
|
| 22 |
-
self.create_dummy_run("
|
| 23 |
-
self.create_dummy_run("
|
| 24 |
-
self.create_dummy_run("
|
|
|
|
| 25 |
self.create_dummy_file("not_a_run.txt") # a file that should be left alone
|
| 26 |
|
| 27 |
def tearDown(self):
|
| 28 |
-
"""
|
| 29 |
-
Clean up test environment after each test.
|
| 30 |
-
"""
|
| 31 |
# Remove the temporary run directory and its contents
|
| 32 |
if os.path.exists(self.test_run_dir):
|
| 33 |
shutil.rmtree(self.test_run_dir)
|
| 34 |
|
| 35 |
def create_dummy_run(self, run_id, hours_old):
|
| 36 |
-
"""
|
| 37 |
-
Creates a dummy run directory with a specific modification time.
|
| 38 |
-
"""
|
| 39 |
run_path = os.path.join(self.test_run_dir, run_id)
|
| 40 |
os.makedirs(run_path, exist_ok=True)
|
| 41 |
|
|
@@ -52,11 +47,11 @@ class TestPurgeOldRuns(unittest.TestCase):
|
|
| 52 |
def test_purge_old_runs(self):
|
| 53 |
"""Tests the purge_old_runs function."""
|
| 54 |
max_age_hours = 0.95
|
| 55 |
-
purge_old_runs(self.test_run_dir, max_age_hours=max_age_hours) # Pass the directory
|
| 56 |
|
| 57 |
# Check which runs should have been purged
|
| 58 |
-
runs_to_keep = ["
|
| 59 |
-
runs_to_purge = ["
|
| 60 |
|
| 61 |
for run_id in runs_to_keep:
|
| 62 |
run_path = os.path.join(self.test_run_dir, run_id)
|
|
@@ -76,10 +71,10 @@ class TestPurgeOldRuns(unittest.TestCase):
|
|
| 76 |
os.utime(item_path, (mtime, mtime))
|
| 77 |
|
| 78 |
max_age_hours = 1.0
|
| 79 |
-
purge_old_runs(self.test_run_dir, max_age_hours=max_age_hours) # Pass the directory
|
| 80 |
|
| 81 |
-
# All runs should still exist.
|
| 82 |
-
expected_runs = ["
|
| 83 |
for run_id in expected_runs:
|
| 84 |
run_path = os.path.join(self.test_run_dir, run_id)
|
| 85 |
self.assertTrue(os.path.exists(run_path), f"Run {run_id} should not have been purged.")
|
|
|
|
| 6 |
|
| 7 |
class TestPurgeOldRuns(unittest.TestCase):
|
| 8 |
def setUp(self):
|
| 9 |
+
"""Set up test environment before each test."""
|
|
|
|
|
|
|
| 10 |
# Create a temporary directory for the runs
|
| 11 |
self.test_run_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "test_run"))
|
| 12 |
if os.path.exists(self.test_run_dir):
|
|
|
|
| 14 |
os.makedirs(self.test_run_dir, exist_ok=True)
|
| 15 |
|
| 16 |
# Create some dummy run directories with different modification times
|
| 17 |
+
self.create_dummy_run("myrun_run1", hours_old=0.5)
|
| 18 |
+
self.create_dummy_run("myrun_run2", hours_old=1.5) # Should be purged
|
| 19 |
+
self.create_dummy_run("myrun_run3", hours_old=2) # Should be purged
|
| 20 |
+
self.create_dummy_run("myrun_run4", hours_old=0.25)
|
| 21 |
+
self.create_dummy_run("myrun_run5", hours_old=1) # Boundary condition, might be purged
|
| 22 |
+
self.create_dummy_run("myrun_run6", hours_old=0) # Today
|
| 23 |
+
self.create_dummy_run("other_run7", hours_old=1.5) #Should NOT be purged
|
| 24 |
self.create_dummy_file("not_a_run.txt") # a file that should be left alone
|
| 25 |
|
| 26 |
def tearDown(self):
|
| 27 |
+
"""Clean up test environment after each test."""
|
|
|
|
|
|
|
| 28 |
# Remove the temporary run directory and its contents
|
| 29 |
if os.path.exists(self.test_run_dir):
|
| 30 |
shutil.rmtree(self.test_run_dir)
|
| 31 |
|
| 32 |
def create_dummy_run(self, run_id, hours_old):
|
| 33 |
+
"""Creates a dummy run directory with a specific modification time."""
|
|
|
|
|
|
|
| 34 |
run_path = os.path.join(self.test_run_dir, run_id)
|
| 35 |
os.makedirs(run_path, exist_ok=True)
|
| 36 |
|
|
|
|
| 47 |
def test_purge_old_runs(self):
|
| 48 |
"""Tests the purge_old_runs function."""
|
| 49 |
max_age_hours = 0.95
|
| 50 |
+
purge_old_runs(self.test_run_dir, max_age_hours=max_age_hours, prefix="myrun_") # Pass the directory
|
| 51 |
|
| 52 |
# Check which runs should have been purged
|
| 53 |
+
runs_to_keep = ["myrun_run1", "myrun_run4", "myrun_run6", "other_run7","not_a_run.txt"]
|
| 54 |
+
runs_to_purge = ["myrun_run2", "myrun_run3", "myrun_run5"]
|
| 55 |
|
| 56 |
for run_id in runs_to_keep:
|
| 57 |
run_path = os.path.join(self.test_run_dir, run_id)
|
|
|
|
| 71 |
os.utime(item_path, (mtime, mtime))
|
| 72 |
|
| 73 |
max_age_hours = 1.0
|
| 74 |
+
purge_old_runs(self.test_run_dir, max_age_hours=max_age_hours, prefix="myrun_") # Pass the directory
|
| 75 |
|
| 76 |
+
# All runs should still exist, including the one with the wrong prefix.
|
| 77 |
+
expected_runs = ["myrun_run1", "myrun_run2", "myrun_run3", "myrun_run4", "myrun_run5", "myrun_run6", "other_run7", "not_a_run.txt"]
|
| 78 |
for run_id in expected_runs:
|
| 79 |
run_path = os.path.join(self.test_run_dir, run_id)
|
| 80 |
self.assertTrue(os.path.exists(run_path), f"Run {run_id} should not have been purged.")
|