wenbemi commited on
Commit
a225344
ยท
verified ยท
1 Parent(s): 552e483

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -9
app.py CHANGED
@@ -1,10 +1,9 @@
1
  # streamlit ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ๊ฐ€ import ๋˜๊ธฐ ์ „์—,
2
  # ์„ค์ • ํŒŒ์ผ ๊ฒฝ๋กœ๋ฅผ ์•ฑ ๋‚ด๋ถ€์˜ ์“ฐ๊ธฐ ๊ฐ€๋Šฅํ•œ ๊ฒฝ๋กœ๋กœ ๊ฐ•์ œ ์ง€์ •ํ•ฉ๋‹ˆ๋‹ค.
3
- import os, pathlib
4
  from huggingface_hub import hf_hub_download
5
  import pandas as pd
6
  import json
7
- import os
8
  import random
9
 
10
  APP_DIR = pathlib.Path(__file__).parent.resolve()
@@ -35,17 +34,21 @@ def _read_csv_bytes(b: bytes) -> pd.DataFrame:
35
  except UnicodeDecodeError:
36
  return pd.read_csv(io.BytesIO(b), encoding="cp949")
37
 
38
- def load_csv_smart(local_path: str, hub_filename: str,
39
- repo_id: str = HF_DATASET_REPO, repo_type: str = "dataset",
40
- revision: str = HF_DATASET_REV) -> pd.DataFrame:
41
- # 1) ๋กœ์ปฌ ์šฐ์„ 
 
 
 
42
  if os.path.exists(local_path):
43
  with open(local_path, "rb") as f:
44
  data = f.read()
45
  if not _is_pointer_bytes(data):
46
- return _read_csv_bytes(data)
47
- # ํฌ์ธํ„ฐ๋ฉด ํ—ˆ๋ธŒ๋กœ ํด๋ฐฑ
48
- # 2) ํ—ˆ๋ธŒ ๋‹ค์šด๋กœ๋“œ
 
49
  cached = hf_hub_download(repo_id=repo_id, filename=hub_filename,
50
  repo_type=repo_type, revision=revision)
51
  try:
 
1
  # streamlit ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ๊ฐ€ import ๋˜๊ธฐ ์ „์—,
2
  # ์„ค์ • ํŒŒ์ผ ๊ฒฝ๋กœ๋ฅผ ์•ฑ ๋‚ด๋ถ€์˜ ์“ฐ๊ธฐ ๊ฐ€๋Šฅํ•œ ๊ฒฝ๋กœ๋กœ ๊ฐ•์ œ ์ง€์ •ํ•ฉ๋‹ˆ๋‹ค.
3
+ import os, pathlib, io
4
  from huggingface_hub import hf_hub_download
5
  import pandas as pd
6
  import json
 
7
  import random
8
 
9
  APP_DIR = pathlib.Path(__file__).parent.resolve()
 
34
  except UnicodeDecodeError:
35
  return pd.read_csv(io.BytesIO(b), encoding="cp949")
36
 
37
+ def load_csv_smart(local_path: str,
38
+ hub_filename: str | None = None,
39
+ repo_id: str = HF_DATASET_REPO,
40
+ repo_type: str = "dataset",
41
+ revision: str = HF_DATASET_REV):
42
+ if hub_filename is None:
43
+ hub_filename = os.path.basename(local_path)
44
  if os.path.exists(local_path):
45
  with open(local_path, "rb") as f:
46
  data = f.read()
47
  if not _is_pointer_bytes(data):
48
+ try:
49
+ return pd.read_csv(io.BytesIO(data), encoding="utf-8")
50
+ except UnicodeDecodeError:
51
+ return pd.read_csv(io.BytesIO(data), encoding="cp949")
52
  cached = hf_hub_download(repo_id=repo_id, filename=hub_filename,
53
  repo_type=repo_type, revision=revision)
54
  try: