Spaces:

green
/

TopicDig

Runtime error

green commited on May 7, 2022

Commit

53247c1

1 Parent(s): 03dd514

Update digestor.py

Files changed (1) hide show

digestor.py CHANGED Viewed

@@ -159,10 +159,10 @@ class Digestor:
         # Finally, chunk the piece, adjusting the chunks if too long.
         for i, j in range_list:
             if (tokenized_len := len(tokenizer(chunk := ' '.join(fractured[i:j])))) <= self.token_limit:  # d[i:j]).replace('\n',' ')))) <= self.token_limit:
-                chunk_list.append(chunk.replace(' .','. '))
             else: # if chunks of <limit> words are too long, back them off.
                 chunk_list.append(' '.join(chunk.split(' ')[: self.token_limit - tokenized_len ]))  # tokenized_len ]).replace('\n',' '))
         return chunk_list

         # Finally, chunk the piece, adjusting the chunks if too long.
         for i, j in range_list:
             if (tokenized_len := len(tokenizer(chunk := ' '.join(fractured[i:j])))) <= self.token_limit:  # d[i:j]).replace('\n',' ')))) <= self.token_limit:
+                chunk_list.append(chunk)
             else: # if chunks of <limit> words are too long, back them off.
                 chunk_list.append(' '.join(chunk.split(' ')[: self.token_limit - tokenized_len ]))  # tokenized_len ]).replace('\n',' '))
+        chunk_list = [i.replace(' . ','. ') for i in chunk_list]
         return chunk_list