Spaces:

keras-io
/

collaborative-filtering-movielens

Runtime error

App Files Files Community

mindwrapped commited on Jun 8, 2022

Commit

cfb8b3f

1 Parent(s): 7f94470

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -25

app.py CHANGED Viewed

@@ -9,13 +9,8 @@ import gradio as gr
 from huggingface_hub import from_pretrained_keras
 # Download the actual data from http://files.grouplens.org/datasets/movielens/ml-latest-small.zip"
-# Use the ratings.csv file
-movielens_data_file_url = (
-    "http://files.grouplens.org/datasets/movielens/ml-latest-small.zip"
-)
-movielens_zipped_file = keras.utils.get_file(
-    "ml-latest-small.zip", movielens_data_file_url, extract=False
-)
 keras_datasets_path = Path(movielens_zipped_file).parents[0]
 movielens_dir = keras_datasets_path / "ml-latest-small"
@@ -27,25 +22,29 @@ if not movielens_dir.exists():
         zip.extractall(path=keras_datasets_path)
         print("Done!")
 ratings_file = movielens_dir / "ratings.csv"
 df = pd.read_csv(ratings_file)
-# Make all the encodings
 user_ids = df["userId"].unique().tolist()
 user2user_encoded = {x: i for i, x in enumerate(user_ids)}
-userencoded2user = {i: x for i, x in enumerate(user_ids)}
 movie_ids = df["movieId"].unique().tolist()
 movie2movie_encoded = {x: i for i, x in enumerate(movie_ids)}
 movie_encoded2movie = {i: x for i, x in enumerate(movie_ids)}
-df["user"] = df["userId"].map(user2user_encoded)
 df["movie"] = df["movieId"].map(movie2movie_encoded)
-num_users = len(user2user_encoded)
 num_movies = len(movie_encoded2movie)
 df["rating"] = df["rating"].values.astype(np.float32)
 # min and max ratings will be used to normalize the ratings later
-min_rating = min(df["rating"])
-max_rating = max(df["rating"])
 # Load model
 model = from_pretrained_keras('mindwrapped/collaborative-filtering-movielens')
@@ -53,14 +52,14 @@ movie_df = pd.read_csv(movielens_dir / "movies.csv")
 def update_user(id):
-  return get_top_rated_from_user(id), get_recommendations(id)
-def get_top_rated_from_user(id):
-  decoded_id = userencoded2user.get(id)
-  movies_watched_by_user = df[df.userId == decoded_id]
   # Get the top rated movies by this user
   top_movies_user = (
       movies_watched_by_user.sort_values(by="rating", ascending=False)
       .head(5)
@@ -76,10 +75,10 @@ def random_user():
 def get_recommendations(id):
-  decoded_id = userencoded2user.get(id)
-  movies_watched_by_user = df[df.userId == decoded_id]
   # Get the top 10 recommended movies for this user
   movies_not_watched = movie_df[
       ~movie_df["movieId"].isin(movies_watched_by_user.movieId.values)
   ]["movieId"]
@@ -88,12 +87,12 @@ def get_recommendations(id):
   )
   movies_not_watched = [[movie2movie_encoded.get(x)] for x in movies_not_watched]
-  # Encode user
-  user_encoder = id
   # Create data [[user_id, movie_id],...]
   user_movie_array = np.hstack(
-      ([[user_encoder]] * len(movies_not_watched), movies_not_watched)
   )
   # Predict ratings for movies not watched

 from huggingface_hub import from_pretrained_keras
 # Download the actual data from http://files.grouplens.org/datasets/movielens/ml-latest-small.zip"
+movielens_data_file_url = "http://files.grouplens.org/datasets/movielens/ml-latest-small.zip"
+movielens_zipped_file = keras.utils.get_file("ml-latest-small.zip", movielens_data_file_url, extract=False)
 keras_datasets_path = Path(movielens_zipped_file).parents[0]
 movielens_dir = keras_datasets_path / "ml-latest-small"
         zip.extractall(path=keras_datasets_path)
         print("Done!")
+# Get the ratings file
 ratings_file = movielens_dir / "ratings.csv"
 df = pd.read_csv(ratings_file)
+# Make the encodings for users
 user_ids = df["userId"].unique().tolist()
 user2user_encoded = {x: i for i, x in enumerate(user_ids)}
+user_encoded2user = {i: x for i, x in enumerate(user_ids)}
+df["user"] = df["userId"].map(user2user_encoded)
+num_users = len(user2user_encoded)
+# Make the encodings for movies
 movie_ids = df["movieId"].unique().tolist()
 movie2movie_encoded = {x: i for i, x in enumerate(movie_ids)}
 movie_encoded2movie = {i: x for i, x in enumerate(movie_ids)}
 df["movie"] = df["movieId"].map(movie2movie_encoded)
 num_movies = len(movie_encoded2movie)
+# Set ratings type
 df["rating"] = df["rating"].values.astype(np.float32)
 # min and max ratings will be used to normalize the ratings later
+# min_rating = min(df["rating"])
+# max_rating = max(df["rating"])
 # Load model
 model = from_pretrained_keras('mindwrapped/collaborative-filtering-movielens')
 def update_user(id):
+  return get_top_rated_movies_from_user(id), get_recommendations(id)
+def get_top_rated_movies_from_user(id):
+  decoded_id = user_encoded2user.get(id)
   # Get the top rated movies by this user
+  movies_watched_by_user = df[df.userId == decoded_id]
   top_movies_user = (
       movies_watched_by_user.sort_values(by="rating", ascending=False)
       .head(5)
 def get_recommendations(id):
+  decoded_id = user_encoded2user.get(id)
   # Get the top 10 recommended movies for this user
+  movies_watched_by_user = df[df.userId == decoded_id]
   movies_not_watched = movie_df[
       ~movie_df["movieId"].isin(movies_watched_by_user.movieId.values)
   ]["movieId"]
   )
   movies_not_watched = [[movie2movie_encoded.get(x)] for x in movies_not_watched]
+  # Encoded user id
+  encoded_id = id
   # Create data [[user_id, movie_id],...]
   user_movie_array = np.hstack(
+      ([[encoded_id]] * len(movies_not_watched), movies_not_watched)
   )
   # Predict ratings for movies not watched