Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from PyPDF4 import PdfFileReader | |
| import tiktoken | |
| def extract_text_from_pdf(file_path): | |
| with open(file_path, "rb") as file: | |
| pdf = PdfFileReader(file) | |
| text = "" | |
| for page_num in range(pdf.getNumPages()): | |
| text += pdf.getPage(page_num).extractText() | |
| return text | |
| def count_tokens(text): | |
| tokenizer = tiktoken.encoding_for_model("gpt-3.5-turbo") | |
| tokens = tokenizer.encode( | |
| text, | |
| disallowed_special=() | |
| ) | |
| return len(tokens) | |
| def count_tokens_in_file(file): | |
| # Extract text from the PDF file | |
| paper_text = extract_text_from_pdf(file.name) | |
| return count_tokens(paper_text) | |
| with gr.Blocks() as demo: | |
| gr.Markdown("Upload your document to count their tokens") | |
| with gr.Tab("Upload PDF & TXT"): | |
| docs_input = gr.File(file_count="single", file_types=[".pdf"]) | |
| tb_tokenCount = gr.Textbox(label='Number of tokens') | |
| btn_count = gr.Button("Count token") | |
| btn_count.click(count_tokens_in_file,inputs=[docs_input],outputs=[tb_tokenCount]) |