Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -12,7 +12,7 @@ import json
|
|
| 12 |
import io
|
| 13 |
from tqdm import tqdm
|
| 14 |
import subprocess
|
| 15 |
-
from huggingface_hub import snapshot_download, upload_file
|
| 16 |
|
| 17 |
# Function to download a Parquet file from a specified URL
|
| 18 |
def download_parquet(url, local_path):
|
|
@@ -454,14 +454,33 @@ def generate_failed_items_str(indices):
|
|
| 454 |
|
| 455 |
# Function to upload the output file to Hugging Face
|
| 456 |
def upload_output_to_huggingface(output_file_path, repo_name, token):
|
| 457 |
-
|
| 458 |
-
|
| 459 |
-
|
| 460 |
-
|
| 461 |
-
repo_type="dataset",
|
| 462 |
-
|
| 463 |
-
|
| 464 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 465 |
|
| 466 |
def translate_dataset(train_url, local_parquet_path, input_file_path, output_file_path, raw_file_path, range_specification, model_type, output_dir, output_repo_name, token, translator, tokenizer):
|
| 467 |
try:
|
|
|
|
| 12 |
import io
|
| 13 |
from tqdm import tqdm
|
| 14 |
import subprocess
|
| 15 |
+
from huggingface_hub import snapshot_download, upload_file, HfApi, create_repo
|
| 16 |
|
| 17 |
# Function to download a Parquet file from a specified URL
|
| 18 |
def download_parquet(url, local_path):
|
|
|
|
| 454 |
|
| 455 |
# Function to upload the output file to Hugging Face
|
| 456 |
def upload_output_to_huggingface(output_file_path, repo_name, token):
|
| 457 |
+
api = HfApi()
|
| 458 |
+
|
| 459 |
+
# Check if the repository exists
|
| 460 |
+
try:
|
| 461 |
+
api.repo_info(repo_id=repo_name, repo_type="dataset", token=token)
|
| 462 |
+
except Exception as e:
|
| 463 |
+
if "404" in str(e):
|
| 464 |
+
# Create the repository if it doesn't exist
|
| 465 |
+
create_repo(repo_id=repo_name, repo_type="dataset", token=token)
|
| 466 |
+
print(f"Created repository: {repo_name}")
|
| 467 |
+
else:
|
| 468 |
+
print(f"Failed to check repository existence: {e}")
|
| 469 |
+
return
|
| 470 |
+
|
| 471 |
+
# Upload the file to the repository
|
| 472 |
+
try:
|
| 473 |
+
upload_file(
|
| 474 |
+
path_or_fileobj=output_file_path,
|
| 475 |
+
path_in_repo=output_file_path,
|
| 476 |
+
repo_id=repo_name,
|
| 477 |
+
repo_type="dataset",
|
| 478 |
+
token=token
|
| 479 |
+
)
|
| 480 |
+
print(f"Uploaded {output_file_path} to Hugging Face repository: {repo_name}")
|
| 481 |
+
except Exception as e:
|
| 482 |
+
print(f"Failed to upload {output_file_path} to Hugging Face: {e}")
|
| 483 |
+
raise
|
| 484 |
|
| 485 |
def translate_dataset(train_url, local_parquet_path, input_file_path, output_file_path, raw_file_path, range_specification, model_type, output_dir, output_repo_name, token, translator, tokenizer):
|
| 486 |
try:
|