Spaces:
Running
on
Zero
Running
on
Zero
update data loading (#1)
Browse files- update data loading (4a6a9087b9c9112279a8d1d77794d2c80e2f4e70)
Co-authored-by: Kai <kai-aizip@users.noreply.huggingface.co>
- utils/data_loader.py +14 -2
utils/data_loader.py
CHANGED
|
@@ -5,11 +5,20 @@ import random
|
|
| 5 |
import re
|
| 6 |
from .context_processor import process_highlights
|
| 7 |
|
|
|
|
|
|
|
|
|
|
| 8 |
def load_arena_data():
|
| 9 |
"""
|
| 10 |
Loads the arena data from the arena_df.csv file in the utils directory.
|
| 11 |
Returns the data in a format compatible with the application.
|
| 12 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
try:
|
| 14 |
# Define the path to the CSV file
|
| 15 |
csv_path = os.path.join('utils', 'arena_df.csv')
|
|
@@ -17,6 +26,9 @@ def load_arena_data():
|
|
| 17 |
# Read the CSV file
|
| 18 |
df = pd.read_csv(csv_path)
|
| 19 |
print(f"Loaded arena data with {len(df)} examples")
|
|
|
|
|
|
|
|
|
|
| 20 |
return df
|
| 21 |
except Exception as e:
|
| 22 |
print(f"Error loading arena data: {e}")
|
|
@@ -39,7 +51,7 @@ def get_random_example():
|
|
| 39 |
Selects a random example from the loaded arena data.
|
| 40 |
Returns the example data in a format compatible with the application.
|
| 41 |
"""
|
| 42 |
-
#
|
| 43 |
df = load_arena_data()
|
| 44 |
|
| 45 |
if df.empty:
|
|
@@ -89,7 +101,7 @@ def get_random_example():
|
|
| 89 |
|
| 90 |
if isinstance(example['contexts_highlighted'], str):
|
| 91 |
try:
|
| 92 |
-
# Try direct parsing
|
| 93 |
raw_str = example['contexts_highlighted']
|
| 94 |
|
| 95 |
# First, manually parse the highlighted contexts using regex
|
|
|
|
| 5 |
import re
|
| 6 |
from .context_processor import process_highlights
|
| 7 |
|
| 8 |
+
# Global data store - loaded once at import time
|
| 9 |
+
_ARENA_DATA = None
|
| 10 |
+
|
| 11 |
def load_arena_data():
|
| 12 |
"""
|
| 13 |
Loads the arena data from the arena_df.csv file in the utils directory.
|
| 14 |
Returns the data in a format compatible with the application.
|
| 15 |
"""
|
| 16 |
+
global _ARENA_DATA
|
| 17 |
+
|
| 18 |
+
# If data is already loaded, return it
|
| 19 |
+
if _ARENA_DATA is not None:
|
| 20 |
+
return _ARENA_DATA
|
| 21 |
+
|
| 22 |
try:
|
| 23 |
# Define the path to the CSV file
|
| 24 |
csv_path = os.path.join('utils', 'arena_df.csv')
|
|
|
|
| 26 |
# Read the CSV file
|
| 27 |
df = pd.read_csv(csv_path)
|
| 28 |
print(f"Loaded arena data with {len(df)} examples")
|
| 29 |
+
|
| 30 |
+
# Store the data globally
|
| 31 |
+
_ARENA_DATA = df
|
| 32 |
return df
|
| 33 |
except Exception as e:
|
| 34 |
print(f"Error loading arena data: {e}")
|
|
|
|
| 51 |
Selects a random example from the loaded arena data.
|
| 52 |
Returns the example data in a format compatible with the application.
|
| 53 |
"""
|
| 54 |
+
# Get the globally stored data - won't reload from disk
|
| 55 |
df = load_arena_data()
|
| 56 |
|
| 57 |
if df.empty:
|
|
|
|
| 101 |
|
| 102 |
if isinstance(example['contexts_highlighted'], str):
|
| 103 |
try:
|
| 104 |
+
# Try direct JSON parsing first
|
| 105 |
raw_str = example['contexts_highlighted']
|
| 106 |
|
| 107 |
# First, manually parse the highlighted contexts using regex
|