rayhane123 commited on
Commit
74b40ea
·
verified ·
1 Parent(s): ca4455d

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +3 -17
main.py CHANGED
@@ -13,14 +13,14 @@ import os
13
  # Initialize FastAPI app
14
  app = FastAPI()
15
 
16
- # Set the correct path for static files
17
- STATIC_DIR = r"C:\Users\User\doc_translation_service\translation\static"
18
 
19
  # Ensure the static directory exists
20
  if not os.path.exists(STATIC_DIR):
21
  os.makedirs(STATIC_DIR)
22
 
23
- app.mount("/", StaticFiles(directory="static", html=True), name="static")
24
 
25
  @app.get("/", response_class=HTMLResponse)
26
  async def read_root():
@@ -56,13 +56,11 @@ def load_translator(src_code: str, tgt_code: str):
56
 
57
  if model_key in AVAILABLE_MODELS:
58
  return pipeline("translation", model=AVAILABLE_MODELS[model_key])
59
-
60
  elif src_code != "en" and tgt_code != "en":
61
  return (
62
  pipeline("translation", model=AVAILABLE_MODELS.get(f"{src_code}-en")),
63
  pipeline("translation", model=AVAILABLE_MODELS.get(f"en-{tgt_code}"))
64
  )
65
-
66
  else:
67
  raise ValueError(f"No model available for {src_code} -> {tgt_code}")
68
 
@@ -75,15 +73,12 @@ def extract_text(file: UploadFile):
75
  try:
76
  if file.filename.endswith(".txt"):
77
  return file.file.read().decode("utf-8")
78
-
79
  elif file.filename.endswith(".pdf"):
80
  doc = fitz.open(stream=file.file.read(), filetype="pdf")
81
  return "\n".join([page.get_text() for page in doc])
82
-
83
  elif file.filename.endswith(".docx"):
84
  doc = Document(file.file)
85
  return "\n".join([para.text for para in doc.paragraphs])
86
-
87
  elif file.filename.endswith(".xlsx"):
88
  wb = openpyxl.load_workbook(file.file)
89
  text = ""
@@ -92,7 +87,6 @@ def extract_text(file: UploadFile):
92
  for row in ws.iter_rows():
93
  text += "\t".join([str(cell.value or "") for cell in row]) + "\n"
94
  return text
95
-
96
  elif file.filename.endswith(".pptx"):
97
  prs = Presentation(file.file)
98
  text = ""
@@ -101,10 +95,8 @@ def extract_text(file: UploadFile):
101
  if hasattr(shape, "text"):
102
  text += shape.text + "\n"
103
  return text
104
-
105
  else:
106
  raise HTTPException(status_code=400, detail="File type not supported.")
107
-
108
  except Exception as e:
109
  raise HTTPException(status_code=500, detail=f"Error extracting text: {str(e)}")
110
 
@@ -126,19 +118,13 @@ async def upload_file(
126
  raise HTTPException(status_code=400, detail=f"Unsupported language: {src_lang} -> {tgt_lang}")
127
 
128
  try:
129
- # Load translation model
130
  translator = load_translator(src_code, tgt_code)
131
-
132
- # If indirect translation via English
133
  if isinstance(translator, tuple):
134
  translator1, translator2 = translator
135
  intermediate_text = "\n".join([translator1(chunk)[0]['translation_text'] for chunk in chunk_text(text)])
136
  translated_text = "\n".join([translator2(chunk)[0]['translation_text'] for chunk in chunk_text(intermediate_text)])
137
-
138
  else:
139
  translated_text = "\n".join([translator(chunk)[0]['translation_text'] for chunk in chunk_text(text)])
140
-
141
  return {"translated_text": translated_text}
142
-
143
  except Exception as e:
144
  raise HTTPException(status_code=500, detail=f"Internal error: {str(e)}")
 
13
  # Initialize FastAPI app
14
  app = FastAPI()
15
 
16
+ # Set the correct path for static files (for Hugging Face Spaces)
17
+ STATIC_DIR = "static"
18
 
19
  # Ensure the static directory exists
20
  if not os.path.exists(STATIC_DIR):
21
  os.makedirs(STATIC_DIR)
22
 
23
+ app.mount("/static", StaticFiles(directory=STATIC_DIR), name="static")
24
 
25
  @app.get("/", response_class=HTMLResponse)
26
  async def read_root():
 
56
 
57
  if model_key in AVAILABLE_MODELS:
58
  return pipeline("translation", model=AVAILABLE_MODELS[model_key])
 
59
  elif src_code != "en" and tgt_code != "en":
60
  return (
61
  pipeline("translation", model=AVAILABLE_MODELS.get(f"{src_code}-en")),
62
  pipeline("translation", model=AVAILABLE_MODELS.get(f"en-{tgt_code}"))
63
  )
 
64
  else:
65
  raise ValueError(f"No model available for {src_code} -> {tgt_code}")
66
 
 
73
  try:
74
  if file.filename.endswith(".txt"):
75
  return file.file.read().decode("utf-8")
 
76
  elif file.filename.endswith(".pdf"):
77
  doc = fitz.open(stream=file.file.read(), filetype="pdf")
78
  return "\n".join([page.get_text() for page in doc])
 
79
  elif file.filename.endswith(".docx"):
80
  doc = Document(file.file)
81
  return "\n".join([para.text for para in doc.paragraphs])
 
82
  elif file.filename.endswith(".xlsx"):
83
  wb = openpyxl.load_workbook(file.file)
84
  text = ""
 
87
  for row in ws.iter_rows():
88
  text += "\t".join([str(cell.value or "") for cell in row]) + "\n"
89
  return text
 
90
  elif file.filename.endswith(".pptx"):
91
  prs = Presentation(file.file)
92
  text = ""
 
95
  if hasattr(shape, "text"):
96
  text += shape.text + "\n"
97
  return text
 
98
  else:
99
  raise HTTPException(status_code=400, detail="File type not supported.")
 
100
  except Exception as e:
101
  raise HTTPException(status_code=500, detail=f"Error extracting text: {str(e)}")
102
 
 
118
  raise HTTPException(status_code=400, detail=f"Unsupported language: {src_lang} -> {tgt_lang}")
119
 
120
  try:
 
121
  translator = load_translator(src_code, tgt_code)
 
 
122
  if isinstance(translator, tuple):
123
  translator1, translator2 = translator
124
  intermediate_text = "\n".join([translator1(chunk)[0]['translation_text'] for chunk in chunk_text(text)])
125
  translated_text = "\n".join([translator2(chunk)[0]['translation_text'] for chunk in chunk_text(intermediate_text)])
 
126
  else:
127
  translated_text = "\n".join([translator(chunk)[0]['translation_text'] for chunk in chunk_text(text)])
 
128
  return {"translated_text": translated_text}
 
129
  except Exception as e:
130
  raise HTTPException(status_code=500, detail=f"Internal error: {str(e)}")