Spaces:

victor
/

websearch

Running

App Files Files Community

websearch / analytics.py

victor HF Staff

Only track average request time for default num_results (4)

7de855a 4 months ago

raw

history blame contribute delete

3.89 kB

	# ─── analytics.py ──────────────────────────────────────────────────────────────
	import os
	import json
	from datetime import datetime, timedelta, timezone
	from filelock import FileLock # pip install filelock
	import pandas as pd # already available in HF images

	# Determine data directory based on environment
	# 1. Check for environment variable override
	# 2. Use /data if it exists and is writable (Hugging Face Spaces with persistent storage)
	# 3. Use ./data for local development
	DATA_DIR = os.getenv("ANALYTICS_DATA_DIR")
	if not DATA_DIR:
	if os.path.exists("/data") and os.access("/data", os.W_OK):
	DATA_DIR = "/data"
	print("[Analytics] Using persistent storage at /data")
	else:
	DATA_DIR = "./data"
	print("[Analytics] Using local storage at ./data")

	os.makedirs(DATA_DIR, exist_ok=True)

	COUNTS_FILE = os.path.join(DATA_DIR, "request_counts.json")
	TIMES_FILE = os.path.join(DATA_DIR, "request_times.json")
	LOCK_FILE = os.path.join(DATA_DIR, "analytics.lock")

	def _load() -> dict:
	if not os.path.exists(COUNTS_FILE):
	return {}
	with open(COUNTS_FILE) as f:
	return json.load(f)

	def _save(data: dict):
	with open(COUNTS_FILE, "w") as f:
	json.dump(data, f)

	def _load_times() -> dict:
	if not os.path.exists(TIMES_FILE):
	return {}
	with open(TIMES_FILE) as f:
	return json.load(f)

	def _save_times(data: dict):
	with open(TIMES_FILE, "w") as f:
	json.dump(data, f)

	async def record_request(duration: float = None, num_results: int = None) -> None:
	"""Increment today's counter (UTC) atomically and optionally record request duration."""
	today = datetime.now(timezone.utc).strftime("%Y-%m-%d")
	with FileLock(LOCK_FILE):
	# Update counts
	data = _load()
	data[today] = data.get(today, 0) + 1
	_save(data)

	# Only record times for default requests (num_results=4)
	if duration is not None and (num_results is None or num_results == 4):
	times = _load_times()
	if today not in times:
	times[today] = []
	times[today].append(round(duration, 2))
	_save_times(times)

	def last_n_days_df(n: int = 30) -> pd.DataFrame:
	"""Return a DataFrame with a row for each of the past n days."""
	now = datetime.now(timezone.utc)
	with FileLock(LOCK_FILE):
	data = _load()
	records = []
	for i in range(n):
	day = (now - timedelta(days=n - 1 - i))
	day_str = day.strftime("%Y-%m-%d")
	# Format date for display (MMM DD)
	display_date = day.strftime("%b %d")
	records.append({
	"date": display_date,
	"count": data.get(day_str, 0),
	"full_date": day_str # Keep full date for tooltip
	})
	return pd.DataFrame(records)

	def last_n_days_avg_time_df(n: int = 30) -> pd.DataFrame:
	"""Return a DataFrame with average request time for each of the past n days."""
	now = datetime.now(timezone.utc)
	with FileLock(LOCK_FILE):
	times = _load_times()
	records = []
	for i in range(n):
	day = (now - timedelta(days=n - 1 - i))
	day_str = day.strftime("%Y-%m-%d")
	# Format date for display (MMM DD)
	display_date = day.strftime("%b %d")

	# Calculate average time for the day
	day_times = times.get(day_str, [])
	avg_time = round(sum(day_times) / len(day_times), 2) if day_times else 0

	records.append({
	"date": display_date,
	"avg_time": avg_time,
	"request_count": len(day_times),
	"full_date": day_str # Keep full date for tooltip
	})
	return pd.DataFrame(records)