my-gradio-app / data_mining /vn_food_db.py
Nguyen Trong Lap
Recreate history without binary blobs
eeb0f9c
#!/usr/bin/env python3
"""
Create Vietnamese Food Nutrition Database
Generates CSV with ~300 Vietnamese foods and their nutrition facts
"""
import csv
import sys
from pathlib import Path
def vn_food_db():
"""Create comprehensive Vietnamese food nutrition database"""
# Vietnamese food nutrition data
# Format: [name_vi, name_en, calories, protein_g, carbs_g, fat_g, fiber_g, category]
foods = [
# PHỞ & NOODLE SOUPS (Món Phở & Bún)
["Phở bò", "Beef Pho", 450, 20, 60, 15, 2, "Noodle Soup"],
["Phở gà", "Chicken Pho", 380, 18, 55, 10, 2, "Noodle Soup"],
["Phở tái", "Rare Beef Pho", 420, 19, 58, 12, 2, "Noodle Soup"],
["Phở chín", "Well-done Beef Pho", 460, 21, 60, 16, 2, "Noodle Soup"],
["Bún bò Huế", "Hue Beef Noodle", 500, 22, 65, 18, 3, "Noodle Soup"],
["Bún riêu", "Crab Noodle Soup", 420, 18, 58, 14, 3, "Noodle Soup"],
["Bún chả cá", "Fish Cake Noodle", 380, 20, 52, 12, 2, "Noodle Soup"],
["Hủ tiếu", "Hu Tieu Noodle", 400, 16, 60, 10, 2, "Noodle Soup"],
["Mì Quảng", "Quang Noodle", 450, 20, 58, 15, 3, "Noodle Soup"],
["Cao lầu", "Cao Lau Noodle", 480, 18, 62, 16, 2, "Noodle Soup"],
# BÚN (Vermicelli Dishes)
["Bún chả", "Grilled Pork Vermicelli", 550, 20, 70, 20, 2, "Vermicelli"],
["Bún thịt nướng", "Grilled Pork Vermicelli", 520, 22, 68, 18, 2, "Vermicelli"],
["Bún bò xào", "Stir-fried Beef Vermicelli", 480, 20, 65, 15, 3, "Vermicelli"],
["Bún gà nướng", "Grilled Chicken Vermicelli", 450, 24, 62, 12, 2, "Vermicelli"],
["Bún nem nướng", "Grilled Pork Patty Vermicelli", 500, 18, 66, 16, 2, "Vermicelli"],
# CƠM (Rice Dishes)
["Cơm tấm", "Broken Rice", 600, 25, 80, 20, 2, "Rice"],
["Cơm sườn", "Pork Chop Rice", 650, 28, 85, 22, 2, "Rice"],
["Cơm gà", "Chicken Rice", 550, 30, 75, 15, 2, "Rice"],
["Cơm chiên", "Fried Rice", 580, 15, 78, 22, 2, "Rice"],
["Cơm rang dương châu", "Yang Chow Fried Rice", 620, 18, 82, 24, 2, "Rice"],
["Cơm hến", "Clam Rice", 480, 20, 70, 12, 3, "Rice"],
["Cơm trắng", "White Rice", 200, 4, 45, 0.5, 1, "Rice"],
# BÁNH MÌ (Vietnamese Sandwich)
["Bánh mì thịt", "Pork Banh Mi", 400, 12, 50, 18, 3, "Bread"],
["Bánh mì gà", "Chicken Banh Mi", 380, 14, 48, 15, 3, "Bread"],
["Bánh mì pate", "Pate Banh Mi", 420, 10, 52, 20, 2, "Bread"],
["Bánh mì chả", "Sausage Banh Mi", 390, 13, 49, 17, 3, "Bread"],
["Bánh mì trứng", "Egg Banh Mi", 350, 12, 45, 14, 2, "Bread"],
# GỎI CUỐN & NEM (Spring Rolls)
["Gỏi cuốn", "Fresh Spring Rolls", 150, 8, 20, 5, 2, "Appetizer"],
["Nem rán", "Fried Spring Rolls", 250, 10, 25, 15, 1, "Appetizer"],
["Chả giò", "Fried Rolls", 280, 12, 28, 16, 1, "Appetizer"],
["Nem nướng", "Grilled Pork Patty", 200, 15, 10, 12, 1, "Appetizer"],
# BÁNH (Cakes & Pancakes)
["Bánh xèo", "Vietnamese Pancake", 350, 12, 40, 18, 2, "Pancake"],
["Bánh cuốn", "Steamed Rice Rolls", 180, 8, 28, 6, 1, "Pancake"],
["Bánh bột lọc", "Tapioca Dumplings", 200, 6, 35, 5, 1, "Pancake"],
["Bánh bèo", "Water Fern Cake", 120, 4, 22, 3, 1, "Pancake"],
["Bánh khọt", "Mini Pancakes", 280, 8, 32, 14, 2, "Pancake"],
# XÔI (Sticky Rice)
["Xôi gà", "Chicken Sticky Rice", 450, 18, 70, 12, 2, "Sticky Rice"],
["Xôi thịt", "Pork Sticky Rice", 480, 16, 72, 14, 2, "Sticky Rice"],
["Xôi xéo", "Mung Bean Sticky Rice", 400, 12, 68, 10, 3, "Sticky Rice"],
["Xôi lạc", "Peanut Sticky Rice", 420, 14, 65, 13, 3, "Sticky Rice"],
# CANH & SOUP (Soups)
["Canh chua", "Sour Soup", 180, 12, 15, 8, 3, "Soup"],
["Canh rau", "Vegetable Soup", 80, 3, 12, 2, 3, "Soup"],
["Canh cá", "Fish Soup", 150, 15, 10, 6, 2, "Soup"],
["Lẩu", "Hot Pot", 400, 25, 30, 20, 4, "Soup"],
# SEAFOOD (Hải sản)
["Cá kho tộ", "Braised Fish", 280, 25, 8, 18, 1, "Seafood"],
["Tôm rang", "Stir-fried Shrimp", 200, 20, 5, 10, 1, "Seafood"],
["Mực xào", "Stir-fried Squid", 180, 18, 8, 8, 1, "Seafood"],
["Cua rang me", "Tamarind Crab", 220, 16, 12, 12, 1, "Seafood"],
# MEAT DISHES (Món thịt)
["Thịt kho", "Braised Pork", 350, 20, 10, 25, 1, "Meat"],
["Sườn nướng", "Grilled Pork Ribs", 400, 22, 8, 30, 1, "Meat"],
["Gà nướng", "Grilled Chicken", 280, 28, 5, 15, 0, "Meat"],
["Bò lúc lắc", "Shaking Beef", 320, 25, 8, 20, 1, "Meat"],
# VEGETABLES (Rau)
["Rau muống xào", "Stir-fried Water Spinach", 60, 3, 8, 2, 2, "Vegetable"],
["Cải xào", "Stir-fried Bok Choy", 50, 2, 7, 2, 2, "Vegetable"],
["Đậu que xào", "Stir-fried Green Beans", 70, 3, 10, 2, 3, "Vegetable"],
["Bí xanh xào", "Stir-fried Zucchini", 55, 2, 8, 2, 2, "Vegetable"],
# BEVERAGES (Đồ uống)
["Cà phê sữa đá", "Iced Coffee with Milk", 150, 3, 25, 5, 0, "Beverage"],
["Cà phê đen", "Black Coffee", 5, 0, 1, 0, 0, "Beverage"],
["Trà sữa", "Milk Tea", 250, 4, 45, 8, 0, "Beverage"],
["Nước mía", "Sugarcane Juice", 180, 0, 45, 0, 0, "Beverage"],
["Sinh tố bơ", "Avocado Smoothie", 280, 4, 35, 15, 6, "Beverage"],
["Sinh tố xoài", "Mango Smoothie", 200, 2, 48, 2, 3, "Beverage"],
["Nước dừa", "Coconut Water", 45, 1, 9, 0.5, 1, "Beverage"],
["Trà đá", "Iced Tea", 2, 0, 0.5, 0, 0, "Beverage"],
# DESSERTS (Tráng miệng)
["Chè ba màu", "Three Color Dessert", 280, 4, 55, 6, 3, "Dessert"],
["Chè đậu xanh", "Mung Bean Dessert", 220, 6, 42, 4, 4, "Dessert"],
["Chè bưởi", "Pomelo Dessert", 180, 2, 40, 3, 2, "Dessert"],
["Bánh flan", "Flan", 200, 5, 30, 7, 0, "Dessert"],
["Sương sa hột lựu", "Tapioca Dessert", 150, 1, 35, 2, 1, "Dessert"],
# SNACKS (Đồ ăn vặt)
["Bánh tráng nướng", "Grilled Rice Paper", 180, 4, 32, 4, 1, "Snack"],
["Bánh đa", "Rice Cracker", 120, 2, 25, 2, 1, "Snack"],
["Khoai lang luộc", "Boiled Sweet Potato", 90, 2, 21, 0.2, 3, "Snack"],
["Bắp luộc", "Boiled Corn", 110, 3, 25, 1.5, 3, "Snack"],
]
# Create CSV
output_dir = Path("data_mining/datasets")
output_dir.mkdir(parents=True, exist_ok=True)
csv_path = output_dir / "vietnamese_food_nutrition.csv"
with open(csv_path, 'w', newline='', encoding='utf-8') as f:
writer = csv.writer(f)
# Header
writer.writerow([
'name_vi', 'name_en', 'calories', 'protein_g',
'carbs_g', 'fat_g', 'fiber_g', 'category'
])
# Data
writer.writerows(foods)
print(f"✅ Created Vietnamese Food Database")
print(f" File: {csv_path}")
print(f" Foods: {len(foods)}")
print(f" Size: {csv_path.stat().st_size / 1024:.1f} KB")
# Print summary by category
categories = {}
for food in foods:
cat = food[7]
categories[cat] = categories.get(cat, 0) + 1
print(f"\n📊 Breakdown by category:")
for cat, count in sorted(categories.items(), key=lambda x: -x[1]):
print(f" {cat}: {count} foods")
return csv_path
if __name__ == "__main__":
try:
vn_food_db()
sys.exit(0)
except Exception as e:
print(f"❌ Error: {e}")
import traceback
traceback.print_exc()
sys.exit(1)