Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -300,8 +300,7 @@ def generate_beams(start_sentence, scores, length_penalty, decoded_sequences, be
|
|
| 300 |
token_scores,
|
| 301 |
) = ([], [], [], [], [], [])
|
| 302 |
|
| 303 |
-
|
| 304 |
-
for beam_ix in range(len(beam_trees)):
|
| 305 |
current_beam = beam_trees[beam_ix]
|
| 306 |
|
| 307 |
# skip if the beam is already final
|
|
@@ -310,18 +309,17 @@ def generate_beams(start_sentence, scores, length_penalty, decoded_sequences, be
|
|
| 310 |
|
| 311 |
# Get top cumulative scores for the current beam
|
| 312 |
current_top_token_indexes = list(
|
| 313 |
-
np.array(scores[step][
|
| 314 |
)
|
| 315 |
top_token_indexes += current_top_token_indexes
|
| 316 |
-
token_scores += list(np.array(scores[step][
|
| 317 |
top_cumulative_scores += list(
|
| 318 |
-
np.array(scores[step][
|
| 319 |
+ current_beam.cumulative_score
|
| 320 |
)
|
| 321 |
beam_indexes += [beam_ix] * n_beams
|
| 322 |
current_sequence += [beam_trees[beam_ix].current_sequence] * n_beams
|
| 323 |
top_tokens += [tokenizer.decode([el]) for el in current_top_token_indexes]
|
| 324 |
-
score_idx += 1
|
| 325 |
|
| 326 |
top_df = pd.DataFrame.from_dict(
|
| 327 |
{
|
|
@@ -358,7 +356,7 @@ def generate_beams(start_sentence, scores, length_penalty, decoded_sequences, be
|
|
| 358 |
break
|
| 359 |
top_df_selected_filtered = top_df_selected.iloc[:beams_to_keep]
|
| 360 |
|
| 361 |
-
# Write the scores table
|
| 362 |
score_idx = 0
|
| 363 |
for beam_ix in range(len(beam_trees)):
|
| 364 |
current_beam = beam_trees[beam_ix]
|
|
|
|
| 300 |
token_scores,
|
| 301 |
) = ([], [], [], [], [], [])
|
| 302 |
|
| 303 |
+
for beam_ix in range(len(beam_trees)): # not range(n_beams) since there might be more ongoing trees.
|
|
|
|
| 304 |
current_beam = beam_trees[beam_ix]
|
| 305 |
|
| 306 |
# skip if the beam is already final
|
|
|
|
| 309 |
|
| 310 |
# Get top cumulative scores for the current beam
|
| 311 |
current_top_token_indexes = list(
|
| 312 |
+
np.array(scores[step][beam_ix].argsort()[-n_beams:])[::-1]
|
| 313 |
)
|
| 314 |
top_token_indexes += current_top_token_indexes
|
| 315 |
+
token_scores += list(np.array(scores[step][beam_ix][current_top_token_indexes]))
|
| 316 |
top_cumulative_scores += list(
|
| 317 |
+
np.array(scores[step][beam_ix][current_top_token_indexes])
|
| 318 |
+ current_beam.cumulative_score
|
| 319 |
)
|
| 320 |
beam_indexes += [beam_ix] * n_beams
|
| 321 |
current_sequence += [beam_trees[beam_ix].current_sequence] * n_beams
|
| 322 |
top_tokens += [tokenizer.decode([el]) for el in current_top_token_indexes]
|
|
|
|
| 323 |
|
| 324 |
top_df = pd.DataFrame.from_dict(
|
| 325 |
{
|
|
|
|
| 356 |
break
|
| 357 |
top_df_selected_filtered = top_df_selected.iloc[:beams_to_keep]
|
| 358 |
|
| 359 |
+
# Write the scores table in each beam tree
|
| 360 |
score_idx = 0
|
| 361 |
for beam_ix in range(len(beam_trees)):
|
| 362 |
current_beam = beam_trees[beam_ix]
|