Spaces:

m-ric
/

beam_search_visualizer

Running

App Files Files Community

m-ric commited on Mar 27, 2024

Commit

aaaec2a

verified ·

1 Parent(s): a9a7993

Update app.py

Browse files

Files changed (1) hide show

app.py +5 -7

app.py CHANGED Viewed

@@ -300,8 +300,7 @@ def generate_beams(start_sentence, scores, length_penalty, decoded_sequences, be
             token_scores,
         ) = ([], [], [], [], [], [])
-        score_idx = 0
-        for beam_ix in range(len(beam_trees)):
             current_beam = beam_trees[beam_ix]
             # skip if the beam is already final
@@ -310,18 +309,17 @@ def generate_beams(start_sentence, scores, length_penalty, decoded_sequences, be
             # Get top cumulative scores for the current beam
             current_top_token_indexes = list(
-                np.array(scores[step][score_idx].argsort()[-n_beams:])[::-1]
             )
             top_token_indexes += current_top_token_indexes
-            token_scores += list(np.array(scores[step][score_idx][current_top_token_indexes]))
             top_cumulative_scores += list(
-                np.array(scores[step][score_idx][current_top_token_indexes])
                 + current_beam.cumulative_score
             )
             beam_indexes += [beam_ix] * n_beams
             current_sequence += [beam_trees[beam_ix].current_sequence] * n_beams
             top_tokens += [tokenizer.decode([el]) for el in current_top_token_indexes]
-            score_idx += 1
         top_df = pd.DataFrame.from_dict(
             {
@@ -358,7 +356,7 @@ def generate_beams(start_sentence, scores, length_penalty, decoded_sequences, be
                 break
         top_df_selected_filtered = top_df_selected.iloc[:beams_to_keep]
-        # Write the scores table - one per beam source
         score_idx = 0
         for beam_ix in range(len(beam_trees)):
             current_beam = beam_trees[beam_ix]

             token_scores,
         ) = ([], [], [], [], [], [])
+        for beam_ix in range(len(beam_trees)): # not range(n_beams) since there might be more ongoing trees.
             current_beam = beam_trees[beam_ix]
             # skip if the beam is already final
             # Get top cumulative scores for the current beam
             current_top_token_indexes = list(
+                np.array(scores[step][beam_ix].argsort()[-n_beams:])[::-1]
             )
             top_token_indexes += current_top_token_indexes
+            token_scores += list(np.array(scores[step][beam_ix][current_top_token_indexes]))
             top_cumulative_scores += list(
+                np.array(scores[step][beam_ix][current_top_token_indexes])
                 + current_beam.cumulative_score
             )
             beam_indexes += [beam_ix] * n_beams
             current_sequence += [beam_trees[beam_ix].current_sequence] * n_beams
             top_tokens += [tokenizer.decode([el]) for el in current_top_token_indexes]
         top_df = pd.DataFrame.from_dict(
             {
                 break
         top_df_selected_filtered = top_df_selected.iloc[:beams_to_keep]
+        # Write the scores table in each beam tree
         score_idx = 0
         for beam_ix in range(len(beam_trees)):
             current_beam = beam_trees[beam_ix]