wenbemi commited on
Commit
61be846
ยท
verified ยท
1 Parent(s): e443a5c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -6
app.py CHANGED
@@ -1,6 +1,7 @@
1
  # streamlit ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ๊ฐ€ import ๋˜๊ธฐ ์ „์—,
2
  # ์„ค์ • ํŒŒ์ผ ๊ฒฝ๋กœ๋ฅผ ์•ฑ ๋‚ด๋ถ€์˜ ์“ฐ๊ธฐ ๊ฐ€๋Šฅํ•œ ๊ฒฝ๋กœ๋กœ ๊ฐ•์ œ ์ง€์ •ํ•ฉ๋‹ˆ๋‹ค.
3
  import os, pathlib
 
4
 
5
  APP_DIR = pathlib.Path(__file__).parent.resolve()
6
  os.environ.setdefault("HOME", str(APP_DIR)) # '~'๊ฐ€ /๊ฐ€ ์•„๋‹ˆ๋ผ /app์œผ๋กœ ๊ฐ€๋„๋ก
@@ -11,7 +12,43 @@ os.environ["STREAMLIT_HOME"] = str(CONFIG_DIR)
11
  os.environ["STREAMLIT_SERVER_HEADLESS"] = "true"
12
  os.environ["STREAMLIT_BROWSER_GATHER_USAGE_STATS"] = "false" # ์„ ํƒ: metrics ํŒŒ์ผ ์ƒ์„ฑ ์ค„์ด๊ธฐ
13
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  import pandas as pd
16
  import json
17
  import os
@@ -65,12 +102,12 @@ def load_data(path):
65
  # st.error("TRIPDATA_URL ๋ฏธ์„ค์ •: Streamlit Secrets์— URL์„ ๋„ฃ์–ด์ฃผ์„ธ์š”.")
66
  # st.stop()
67
 
68
- travel_df = load_data("ํŠธ๋ฆฝ๋‹ท์ปด_๊ฐ์ •_ํ…Œ๋งˆ_ํ•œ์ค„์„ค๋ช…_ํ†ตํ•ฉ_07_08.csv")
69
- external_score_df = load_data("ํด๋Ÿฌ์Šคํ„ฐ_ํฌํ•จ_์™ธ๋ถ€์š”์ธ_์ข…ํ•ฉ์ ์ˆ˜_๊ฒฐ๊ณผ_์ตœ์ข….csv")
70
- festival_df = load_data("์ „์ฒ˜๋ฆฌ_ํ†ตํ•ฉ์ง€์—ญ์ถ•์ œ.csv")
71
- weather_df = load_data("์ „์ฒ˜๋ฆฌ_๋‚ ์”จ_ํ†ตํ•ฉ_07_08.csv")
72
- package_df = load_data("๋ชจ๋‘ํˆฌ์–ด_์ปฌ๋Ÿผ๋ณ„_๊ฐœ์ˆ˜_07_08.csv")
73
- master_df = load_data("๋‚˜๋ผ_๋„์‹œ_๋ฆฌ์ŠคํŠธ.csv")
74
  theme_title_phrases = load_json_data("theme_title_phrases.json")
75
 
76
  # travel_df๊ฐ€ ์„ฑ๊ณต์ ์œผ๋กœ ๋กœ๋“œ๋˜์—ˆ๋Š”์ง€ ์ตœ์ข… ํ™•์ธ
 
1
  # streamlit ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ๊ฐ€ import ๋˜๊ธฐ ์ „์—,
2
  # ์„ค์ • ํŒŒ์ผ ๊ฒฝ๋กœ๋ฅผ ์•ฑ ๋‚ด๋ถ€์˜ ์“ฐ๊ธฐ ๊ฐ€๋Šฅํ•œ ๊ฒฝ๋กœ๋กœ ๊ฐ•์ œ ์ง€์ •ํ•ฉ๋‹ˆ๋‹ค.
3
  import os, pathlib
4
+ from huggingface_hub import hf_hub_download
5
 
6
  APP_DIR = pathlib.Path(__file__).parent.resolve()
7
  os.environ.setdefault("HOME", str(APP_DIR)) # '~'๊ฐ€ /๊ฐ€ ์•„๋‹ˆ๋ผ /app์œผ๋กœ ๊ฐ€๋„๋ก
 
12
  os.environ["STREAMLIT_SERVER_HEADLESS"] = "true"
13
  os.environ["STREAMLIT_BROWSER_GATHER_USAGE_STATS"] = "false" # ์„ ํƒ: metrics ํŒŒ์ผ ์ƒ์„ฑ ์ค„์ด๊ธฐ
14
 
15
+ HF_DATASET_REPO = os.getenv("HF_DATASET_REPO", "yourname/your-dataset") # โ† ๋ณธ์ธ ๋ฆฌํฌ
16
+ HF_DATASET_REV = os.getenv("HF_DATASET_REV", "main")
17
+
18
+ def _is_pointer_bytes(b: bytes) -> bool:
19
+ head = b[:2048].decode(errors="ignore").lower()
20
+ # git-lfs / xet ํฌ์ธํ„ฐ ํ…์ŠคํŠธ ํŒจํ„ด ๋ชจ๋‘ ๊ฐ์ง€
21
+ return (
22
+ "version https://git-lfs.github.com/spec/v1" in head or
23
+ "git-lfs" in head or
24
+ "xet" in head or # e.g. "Xet backed hash"
25
+ "pointer size" in head
26
+ )
27
 
28
+ def _read_csv_bytes(b: bytes) -> pd.DataFrame:
29
+ try:
30
+ return pd.read_csv(io.BytesIO(b), encoding="utf-8")
31
+ except UnicodeDecodeError:
32
+ return pd.read_csv(io.BytesIO(b), encoding="cp949")
33
+
34
+ def load_csv_smart(local_path: str, hub_filename: str,
35
+ repo_id: str = HF_DATASET_REPO, repo_type: str = "dataset",
36
+ revision: str = HF_DATASET_REV) -> pd.DataFrame:
37
+ # 1) ๋กœ์ปฌ ์šฐ์„ 
38
+ if os.path.exists(local_path):
39
+ with open(local_path, "rb") as f:
40
+ data = f.read()
41
+ if not _is_pointer_bytes(data):
42
+ return _read_csv_bytes(data)
43
+ # ํฌ์ธํ„ฐ๋ฉด ํ—ˆ๋ธŒ๋กœ ํด๋ฐฑ
44
+ # 2) ํ—ˆ๋ธŒ ๋‹ค์šด๋กœ๋“œ
45
+ cached = hf_hub_download(repo_id=repo_id, filename=hub_filename,
46
+ repo_type=repo_type, revision=revision)
47
+ try:
48
+ return pd.read_csv(cached, encoding="utf-8")
49
+ except UnicodeDecodeError:
50
+ return pd.read_csv(cached, encoding="cp949")
51
+
52
  import pandas as pd
53
  import json
54
  import os
 
102
  # st.error("TRIPDATA_URL ๋ฏธ์„ค์ •: Streamlit Secrets์— URL์„ ๋„ฃ์–ด์ฃผ์„ธ์š”.")
103
  # st.stop()
104
 
105
+ travel_df = load_csv_smart("trip_emotions.csv")
106
+ external_score_df = load_csv_smart("external_scores.csv")
107
+ festival_df = load_csv_smart("festivals.csv")
108
+ weather_df = load_csv_smart("weather.csv")
109
+ package_df = load_csv_smart("packages.csv")
110
+ master_df = load_csv_smart("countries_cities.csv")
111
  theme_title_phrases = load_json_data("theme_title_phrases.json")
112
 
113
  # travel_df๊ฐ€ ์„ฑ๊ณต์ ์œผ๋กœ ๋กœ๋“œ๋˜์—ˆ๋Š”์ง€ ์ตœ์ข… ํ™•์ธ