Spaces:
Paused
Paused
update
Browse files
app.py
CHANGED
|
@@ -5,11 +5,50 @@ import os
|
|
| 5 |
import shutil
|
| 6 |
import uuid
|
| 7 |
import glob
|
| 8 |
-
|
| 9 |
-
from
|
| 10 |
|
| 11 |
api = HfApi(token=os.environ["HF_TOKEN"])
|
| 12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
scheduler = CommitScheduler(
|
| 14 |
repo_id="taesiri/zb_dataset_storage",
|
| 15 |
repo_type="dataset",
|
|
@@ -884,4 +923,8 @@ with gr.Blocks() as demo:
|
|
| 884 |
],
|
| 885 |
)
|
| 886 |
|
| 887 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
import shutil
|
| 6 |
import uuid
|
| 7 |
import glob
|
| 8 |
+
from huggingface_hub import CommitScheduler, HfApi, snapshot_download
|
| 9 |
+
from pathlib import Path
|
| 10 |
|
| 11 |
api = HfApi(token=os.environ["HF_TOKEN"])
|
| 12 |
|
| 13 |
+
|
| 14 |
+
# Download existing data from hub
|
| 15 |
+
def sync_with_hub():
|
| 16 |
+
"""
|
| 17 |
+
Synchronize local data with the hub by downloading latest dataset
|
| 18 |
+
"""
|
| 19 |
+
print("Starting sync with hub...")
|
| 20 |
+
data_dir = Path("./data")
|
| 21 |
+
if data_dir.exists():
|
| 22 |
+
# Backup existing data
|
| 23 |
+
backup_dir = Path("./data_backup")
|
| 24 |
+
if backup_dir.exists():
|
| 25 |
+
shutil.rmtree(backup_dir)
|
| 26 |
+
shutil.copytree(data_dir, backup_dir)
|
| 27 |
+
|
| 28 |
+
# Download latest data from hub
|
| 29 |
+
repo_path = snapshot_download(
|
| 30 |
+
repo_id="taesiri/zb_dataset_storage", repo_type="dataset", local_dir="hub_data"
|
| 31 |
+
)
|
| 32 |
+
|
| 33 |
+
# Merge hub data with local data
|
| 34 |
+
hub_data_dir = Path(repo_path) / "data"
|
| 35 |
+
if hub_data_dir.exists():
|
| 36 |
+
# Create data dir if it doesn't exist
|
| 37 |
+
data_dir.mkdir(exist_ok=True)
|
| 38 |
+
|
| 39 |
+
# Copy files from hub
|
| 40 |
+
for item in hub_data_dir.glob("*"):
|
| 41 |
+
if item.is_dir():
|
| 42 |
+
dest = data_dir / item.name
|
| 43 |
+
if not dest.exists(): # Only copy if doesn't exist locally
|
| 44 |
+
shutil.copytree(item, dest)
|
| 45 |
+
|
| 46 |
+
# Clean up downloaded repo
|
| 47 |
+
if Path("hub_data").exists():
|
| 48 |
+
shutil.rmtree("hub_data")
|
| 49 |
+
print("Finished syncing with hub!")
|
| 50 |
+
|
| 51 |
+
|
| 52 |
scheduler = CommitScheduler(
|
| 53 |
repo_id="taesiri/zb_dataset_storage",
|
| 54 |
repo_type="dataset",
|
|
|
|
| 923 |
],
|
| 924 |
)
|
| 925 |
|
| 926 |
+
if __name__ == "__main__":
|
| 927 |
+
print("Initializing app...")
|
| 928 |
+
sync_with_hub() # Sync before launching the app
|
| 929 |
+
print("Starting Gradio interface...")
|
| 930 |
+
demo.launch()
|