Spaces:
Runtime error
Runtime error
added article="This interface is based on [BookNLP](https://github.com/booknlp/booknlp)."
4f6be96
verified
| import os | |
| import gradio as gr | |
| import subprocess | |
| # Define function to convert ebook to txt using Calibre | |
| def convert_to_txt(input_file): | |
| output_txt = os.path.splitext(input_file)[0] + ".txt" | |
| if not os.path.exists(output_txt): | |
| subprocess.run(["ebook-convert", input_file, output_txt], check=True) | |
| return output_txt | |
| # Define function to process file | |
| def process_book(file): | |
| import shutil | |
| import spacy | |
| from booknlp.booknlp import BookNLP | |
| from spacy.cli import download | |
| #This will download the booknlp files using my huggingface backup | |
| import download_missing_booknlp_models | |
| input_file = file.name | |
| output_dir = "output_dir/booknlp_output/" | |
| book_id = os.path.splitext(os.path.basename(input_file))[0] | |
| # Ensure Spacy model is downloaded | |
| def ensure_spacy_model(): | |
| try: | |
| spacy.load("en_core_web_sm") | |
| except OSError: | |
| download("en_core_web_sm") | |
| # Initialize Spacy model and BookNLP | |
| ensure_spacy_model() | |
| model_params = { | |
| "pipeline": "entity,quote,supersense,event,coref", | |
| "model": "big" | |
| } | |
| booknlp = BookNLP("en", model_params) | |
| # Check if the file is already a .txt file | |
| if not input_file.endswith(".txt"): | |
| input_file = convert_to_txt(input_file) | |
| # Create output directory if it doesn't exist | |
| if os.path.exists(output_dir): | |
| shutil.rmtree(output_dir) | |
| os.makedirs(output_dir) | |
| # Run BookNLP | |
| booknlp.process(input_file, output_dir, book_id) | |
| # Zip the output folder | |
| zip_file = f"{output_dir}/{book_id}_output.zip" | |
| shutil.make_archive(f"{output_dir}/{book_id}_output", 'zip', output_dir) | |
| return zip_file | |
| # Gradio Interface | |
| def gradio_interface(): | |
| # Define supported file formats | |
| supported_formats = [ | |
| '.azw', '.azw3', '.azw4', '.cbz', '.cbr', '.cb7', '.cbc', '.chm', | |
| '.djvu', '.docx', '.epub', '.fb2', '.fbz', '.html', '.htmlz', '.lit', | |
| '.lrf', '.mobi', '.odt', '.pdf', '.prc', '.pdb', '.pml', '.rb', | |
| '.rtf', '.snb', '.tcr', '.txt', '.txtz' | |
| ] | |
| file_input = gr.File(file_types=supported_formats, label="Upload an ebook file (.azw, .epub, .pdf, .txt, etc.)") | |
| file_output = gr.File(label="Download the output files") | |
| # Show supported formats in the description | |
| description = f"Upload any of the supported formats: {', '.join(supported_formats)}. If a .txt file is uploaded, it will directly be processed by BookNLP. Otherwise, it will be converted to .txt using Calibre first." | |
| gr.Interface( | |
| fn=process_book, | |
| inputs=file_input, | |
| outputs=file_output, | |
| title="BookNLP Processor with Ebook Support", | |
| description=description, | |
| article="This interface is based on [BookNLP](https://github.com/booknlp/booknlp)." | |
| ).launch() | |
| if __name__ == "__main__": | |
| gradio_interface() | |