Spaces:

eubinecto
/

idiomify

Runtime error

App Files Files Community

eubinecto commited on Mar 7, 2022

Commit

59df933

1 Parent(s): 72190fb

[#9] main_deploy.py: removed authors & versions. config.yaml: incresed epochs with smaller lr. fetchers.py: typo fixed

Browse files

Files changed (6) hide show

config.yaml +2 -2
idiomify/fetchers.py +5 -5
idiomify/models.py +0 -1
idiomify/paths.py +0 -1
idiomify/preprocess.py +0 -1
main_deploy.py +2 -6

config.yaml CHANGED Viewed

@@ -3,11 +3,11 @@ idiomifier:
   ver: m-1-3
   desc: Just overfitting on PIE dataset, but now with <idiom> & </idiom> special tokens.
   bart: facebook/bart-base
-  lr: 0.0001
   literal2idiomatic_ver: d-1-3
   idioms_ver: d-1-3
   tokenizer_ver: t-1-1
-  max_epochs: 3
   batch_size: 40
   shuffle: true
   seed: 104

   ver: m-1-3
   desc: Just overfitting on PIE dataset, but now with <idiom> & </idiom> special tokens.
   bart: facebook/bart-base
+  lr: 0.00005
   literal2idiomatic_ver: d-1-3
   idioms_ver: d-1-3
   tokenizer_ver: t-1-1
+  max_epochs: 8
   batch_size: 40
   shuffle: true
   seed: 104

idiomify/fetchers.py CHANGED Viewed

@@ -27,7 +27,7 @@ def fetch_idioms(ver: str, run: Run = None) -> pd.DataFrame:
         artifact = run.use_artifact(f"idioms:{ver}", type="dataset")
     else:
         artifact = wandb.Api().artifact(f"eubinecto/idiomify/idioms:{ver}", type="dataset")
-    artifact_dir = artifact.download(root=idioms_dir(ver))
     tsv_path = path.join(artifact_dir, "all.tsv")
     return pd.read_csv(tsv_path, sep="\t")
@@ -39,7 +39,7 @@ def fetch_literal2idiomatic(ver: str, run: Run = None) -> Tuple[pd.DataFrame, pd
         artifact = run.use_artifact(f"literal2idiomatic:{ver}", type="dataset")
     else:
         artifact = wandb.Api().artifact(f"eubinecto/idiomify/literal2idiomatic:{ver}", type="dataset")
-    artifact_dir = artifact.download(root=literal2idiomatic(ver))
     train_path = path.join(artifact_dir, "train.tsv")
     test_path = path.join(artifact_dir, "test.tsv")
     train_df = pd.read_csv(train_path, sep="\t")
@@ -57,10 +57,10 @@ def fetch_idiomifier(ver: str, run: Run = None) -> Idiomifier:
     else:
         artifact = wandb.Api().artifact(f"eubinecto/idiomify/idiomifier:{ver}", type="model")
     config = artifact.metadata
-    artifact_dir = artifact.download(root=idiomifier_dir(ver))
     ckpt_path = path.join(artifact_dir, "model.ckpt")
     bart = AutoModelForSeq2SeqLM.from_config(AutoConfig.from_pretrained(config['bart']))
-    bart.resize_embeddings(config['vocab_size'])
     model = Idiomifier.load_from_checkpoint(ckpt_path, bart=bart)
     return model
@@ -70,7 +70,7 @@ def fetch_tokenizer(ver: str, run: Run = None) -> BartTokenizer:
         artifact = run.use_artifact(f"tokenizer:{ver}", type="other")
     else:
         artifact = wandb.Api().artifact(f"eubinecto/idiomify/tokenizer:{ver}", type="other")
-    artifact_dir = artifact.download(root=tokenizer_dir(ver))
     tokenizer = BartTokenizer.from_pretrained(artifact_dir)
     return tokenizer

         artifact = run.use_artifact(f"idioms:{ver}", type="dataset")
     else:
         artifact = wandb.Api().artifact(f"eubinecto/idiomify/idioms:{ver}", type="dataset")
+    artifact_dir = artifact.download(root=str(idioms_dir(ver)))
     tsv_path = path.join(artifact_dir, "all.tsv")
     return pd.read_csv(tsv_path, sep="\t")
         artifact = run.use_artifact(f"literal2idiomatic:{ver}", type="dataset")
     else:
         artifact = wandb.Api().artifact(f"eubinecto/idiomify/literal2idiomatic:{ver}", type="dataset")
+    artifact_dir = artifact.download(root=str(literal2idiomatic(ver)))
     train_path = path.join(artifact_dir, "train.tsv")
     test_path = path.join(artifact_dir, "test.tsv")
     train_df = pd.read_csv(train_path, sep="\t")
     else:
         artifact = wandb.Api().artifact(f"eubinecto/idiomify/idiomifier:{ver}", type="model")
     config = artifact.metadata
+    artifact_dir = artifact.download(root=str(idiomifier_dir(ver)))
     ckpt_path = path.join(artifact_dir, "model.ckpt")
     bart = AutoModelForSeq2SeqLM.from_config(AutoConfig.from_pretrained(config['bart']))
+    bart.resize_token_embeddings(config['vocab_size'])
     model = Idiomifier.load_from_checkpoint(ckpt_path, bart=bart)
     return model
         artifact = run.use_artifact(f"tokenizer:{ver}", type="other")
     else:
         artifact = wandb.Api().artifact(f"eubinecto/idiomify/tokenizer:{ver}", type="other")
+    artifact_dir = artifact.download(root=str(tokenizer_dir(ver)))
     tokenizer = BartTokenizer.from_pretrained(artifact_dir)
     return tokenizer

idiomify/models.py CHANGED Viewed

@@ -71,4 +71,3 @@ class Idiomifier(pl.LightningModule):  # noqa
         """
         # The authors used Adam, so we might as well use it as well.
         return torch.optim.AdamW(self.parameters(), lr=self.hparams['lr'])

         """
         # The authors used Adam, so we might as well use it as well.
         return torch.optim.AdamW(self.parameters(), lr=self.hparams['lr'])

idiomify/paths.py CHANGED Viewed

@@ -19,4 +19,3 @@ def idiomifier_dir(ver: str) -> Path:
 def tokenizer_dir(ver: str) -> Path:
     return ARTIFACTS_DIR / f"tokenizer_{ver}"


19
20	def tokenizer_dir(ver: str) -> Path:
21	return ARTIFACTS_DIR / f"tokenizer_{ver}"

idiomify/preprocess.py CHANGED Viewed

@@ -59,4 +59,3 @@ def stratified_split(df: pd.DataFrame, ratio: float, seed: int) -> Tuple[pd.Data
                                           test_size=other_size, random_state=seed,
                                           shuffle=True)
     return ratio_df, other_df

                                           test_size=other_size, random_state=seed,
                                           shuffle=True)
     return ratio_df, other_df

main_deploy.py CHANGED Viewed

@@ -1,20 +1,18 @@
 """
 we deploy the pipeline via streamlit.
 """
-from typing import Tuple, List
 import streamlit as st
 from transformers import BartTokenizer
 from idiomify.fetchers import fetch_config, fetch_idiomifier, fetch_idioms
 from idiomify.pipeline import Pipeline
-from idiomify.models import Idiomifier
 @st.cache(allow_output_mutation=True)
-def fetch_resources() -> Tuple[dict, Idiomifier, BartTokenizer, List[str]]:
     config = fetch_config()['idiomifier']
     model = fetch_idiomifier(config['ver'])
-    idioms = fetch_idioms(config['idioms_ver'])
     tokenizer = BartTokenizer.from_pretrained(config['bart'])
     return config, model, tokenizer, idioms
@@ -24,8 +22,6 @@ def main():
     model.eval()
     pipeline = Pipeline(model, tokenizer)
     st.title("Idiomify Demo")
-    st.markdown(f"Author: `Eu-Bin KIM`")
-    st.markdown(f"Version: `{config['ver']}`")
     text = st.text_area("Type sentences here",
                         value="Just remember there will always be a hope even when things look black")
     with st.sidebar:

 """
 we deploy the pipeline via streamlit.
 """
 import streamlit as st
 from transformers import BartTokenizer
 from idiomify.fetchers import fetch_config, fetch_idiomifier, fetch_idioms
 from idiomify.pipeline import Pipeline
 @st.cache(allow_output_mutation=True)
+def fetch_resources() -> tuple:
     config = fetch_config()['idiomifier']
     model = fetch_idiomifier(config['ver'])
     tokenizer = BartTokenizer.from_pretrained(config['bart'])
+    idioms = fetch_idioms(config['idioms_ver'])
     return config, model, tokenizer, idioms
     model.eval()
     pipeline = Pipeline(model, tokenizer)
     st.title("Idiomify Demo")
     text = st.text_area("Type sentences here",
                         value="Just remember there will always be a hope even when things look black")
     with st.sidebar: