Spaces:
Sleeping
Sleeping
Update main.py
Browse files
main.py
CHANGED
|
@@ -13,14 +13,14 @@ import os
|
|
| 13 |
# Initialize FastAPI app
|
| 14 |
app = FastAPI()
|
| 15 |
|
| 16 |
-
# Set the correct path for static files
|
| 17 |
-
STATIC_DIR =
|
| 18 |
|
| 19 |
# Ensure the static directory exists
|
| 20 |
if not os.path.exists(STATIC_DIR):
|
| 21 |
os.makedirs(STATIC_DIR)
|
| 22 |
|
| 23 |
-
app.mount("/", StaticFiles(directory=
|
| 24 |
|
| 25 |
@app.get("/", response_class=HTMLResponse)
|
| 26 |
async def read_root():
|
|
@@ -56,13 +56,11 @@ def load_translator(src_code: str, tgt_code: str):
|
|
| 56 |
|
| 57 |
if model_key in AVAILABLE_MODELS:
|
| 58 |
return pipeline("translation", model=AVAILABLE_MODELS[model_key])
|
| 59 |
-
|
| 60 |
elif src_code != "en" and tgt_code != "en":
|
| 61 |
return (
|
| 62 |
pipeline("translation", model=AVAILABLE_MODELS.get(f"{src_code}-en")),
|
| 63 |
pipeline("translation", model=AVAILABLE_MODELS.get(f"en-{tgt_code}"))
|
| 64 |
)
|
| 65 |
-
|
| 66 |
else:
|
| 67 |
raise ValueError(f"No model available for {src_code} -> {tgt_code}")
|
| 68 |
|
|
@@ -75,15 +73,12 @@ def extract_text(file: UploadFile):
|
|
| 75 |
try:
|
| 76 |
if file.filename.endswith(".txt"):
|
| 77 |
return file.file.read().decode("utf-8")
|
| 78 |
-
|
| 79 |
elif file.filename.endswith(".pdf"):
|
| 80 |
doc = fitz.open(stream=file.file.read(), filetype="pdf")
|
| 81 |
return "\n".join([page.get_text() for page in doc])
|
| 82 |
-
|
| 83 |
elif file.filename.endswith(".docx"):
|
| 84 |
doc = Document(file.file)
|
| 85 |
return "\n".join([para.text for para in doc.paragraphs])
|
| 86 |
-
|
| 87 |
elif file.filename.endswith(".xlsx"):
|
| 88 |
wb = openpyxl.load_workbook(file.file)
|
| 89 |
text = ""
|
|
@@ -92,7 +87,6 @@ def extract_text(file: UploadFile):
|
|
| 92 |
for row in ws.iter_rows():
|
| 93 |
text += "\t".join([str(cell.value or "") for cell in row]) + "\n"
|
| 94 |
return text
|
| 95 |
-
|
| 96 |
elif file.filename.endswith(".pptx"):
|
| 97 |
prs = Presentation(file.file)
|
| 98 |
text = ""
|
|
@@ -101,10 +95,8 @@ def extract_text(file: UploadFile):
|
|
| 101 |
if hasattr(shape, "text"):
|
| 102 |
text += shape.text + "\n"
|
| 103 |
return text
|
| 104 |
-
|
| 105 |
else:
|
| 106 |
raise HTTPException(status_code=400, detail="File type not supported.")
|
| 107 |
-
|
| 108 |
except Exception as e:
|
| 109 |
raise HTTPException(status_code=500, detail=f"Error extracting text: {str(e)}")
|
| 110 |
|
|
@@ -126,19 +118,13 @@ async def upload_file(
|
|
| 126 |
raise HTTPException(status_code=400, detail=f"Unsupported language: {src_lang} -> {tgt_lang}")
|
| 127 |
|
| 128 |
try:
|
| 129 |
-
# Load translation model
|
| 130 |
translator = load_translator(src_code, tgt_code)
|
| 131 |
-
|
| 132 |
-
# If indirect translation via English
|
| 133 |
if isinstance(translator, tuple):
|
| 134 |
translator1, translator2 = translator
|
| 135 |
intermediate_text = "\n".join([translator1(chunk)[0]['translation_text'] for chunk in chunk_text(text)])
|
| 136 |
translated_text = "\n".join([translator2(chunk)[0]['translation_text'] for chunk in chunk_text(intermediate_text)])
|
| 137 |
-
|
| 138 |
else:
|
| 139 |
translated_text = "\n".join([translator(chunk)[0]['translation_text'] for chunk in chunk_text(text)])
|
| 140 |
-
|
| 141 |
return {"translated_text": translated_text}
|
| 142 |
-
|
| 143 |
except Exception as e:
|
| 144 |
raise HTTPException(status_code=500, detail=f"Internal error: {str(e)}")
|
|
|
|
| 13 |
# Initialize FastAPI app
|
| 14 |
app = FastAPI()
|
| 15 |
|
| 16 |
+
# Set the correct path for static files (for Hugging Face Spaces)
|
| 17 |
+
STATIC_DIR = "static"
|
| 18 |
|
| 19 |
# Ensure the static directory exists
|
| 20 |
if not os.path.exists(STATIC_DIR):
|
| 21 |
os.makedirs(STATIC_DIR)
|
| 22 |
|
| 23 |
+
app.mount("/static", StaticFiles(directory=STATIC_DIR), name="static")
|
| 24 |
|
| 25 |
@app.get("/", response_class=HTMLResponse)
|
| 26 |
async def read_root():
|
|
|
|
| 56 |
|
| 57 |
if model_key in AVAILABLE_MODELS:
|
| 58 |
return pipeline("translation", model=AVAILABLE_MODELS[model_key])
|
|
|
|
| 59 |
elif src_code != "en" and tgt_code != "en":
|
| 60 |
return (
|
| 61 |
pipeline("translation", model=AVAILABLE_MODELS.get(f"{src_code}-en")),
|
| 62 |
pipeline("translation", model=AVAILABLE_MODELS.get(f"en-{tgt_code}"))
|
| 63 |
)
|
|
|
|
| 64 |
else:
|
| 65 |
raise ValueError(f"No model available for {src_code} -> {tgt_code}")
|
| 66 |
|
|
|
|
| 73 |
try:
|
| 74 |
if file.filename.endswith(".txt"):
|
| 75 |
return file.file.read().decode("utf-8")
|
|
|
|
| 76 |
elif file.filename.endswith(".pdf"):
|
| 77 |
doc = fitz.open(stream=file.file.read(), filetype="pdf")
|
| 78 |
return "\n".join([page.get_text() for page in doc])
|
|
|
|
| 79 |
elif file.filename.endswith(".docx"):
|
| 80 |
doc = Document(file.file)
|
| 81 |
return "\n".join([para.text for para in doc.paragraphs])
|
|
|
|
| 82 |
elif file.filename.endswith(".xlsx"):
|
| 83 |
wb = openpyxl.load_workbook(file.file)
|
| 84 |
text = ""
|
|
|
|
| 87 |
for row in ws.iter_rows():
|
| 88 |
text += "\t".join([str(cell.value or "") for cell in row]) + "\n"
|
| 89 |
return text
|
|
|
|
| 90 |
elif file.filename.endswith(".pptx"):
|
| 91 |
prs = Presentation(file.file)
|
| 92 |
text = ""
|
|
|
|
| 95 |
if hasattr(shape, "text"):
|
| 96 |
text += shape.text + "\n"
|
| 97 |
return text
|
|
|
|
| 98 |
else:
|
| 99 |
raise HTTPException(status_code=400, detail="File type not supported.")
|
|
|
|
| 100 |
except Exception as e:
|
| 101 |
raise HTTPException(status_code=500, detail=f"Error extracting text: {str(e)}")
|
| 102 |
|
|
|
|
| 118 |
raise HTTPException(status_code=400, detail=f"Unsupported language: {src_lang} -> {tgt_lang}")
|
| 119 |
|
| 120 |
try:
|
|
|
|
| 121 |
translator = load_translator(src_code, tgt_code)
|
|
|
|
|
|
|
| 122 |
if isinstance(translator, tuple):
|
| 123 |
translator1, translator2 = translator
|
| 124 |
intermediate_text = "\n".join([translator1(chunk)[0]['translation_text'] for chunk in chunk_text(text)])
|
| 125 |
translated_text = "\n".join([translator2(chunk)[0]['translation_text'] for chunk in chunk_text(intermediate_text)])
|
|
|
|
| 126 |
else:
|
| 127 |
translated_text = "\n".join([translator(chunk)[0]['translation_text'] for chunk in chunk_text(text)])
|
|
|
|
| 128 |
return {"translated_text": translated_text}
|
|
|
|
| 129 |
except Exception as e:
|
| 130 |
raise HTTPException(status_code=500, detail=f"Internal error: {str(e)}")
|