Spaces:
Sleeping
Sleeping
Lucas ARRIESSE
commited on
Commit
·
dc794b1
1
Parent(s):
4633840
Hotfix : change max DL limit 120 -> 30 and limi
Browse files- api/docs.py +21 -18
api/docs.py
CHANGED
|
@@ -106,35 +106,38 @@ async def convert_file(contents: io.BytesIO, filename: str, input_ext: str, outp
|
|
| 106 |
|
| 107 |
|
| 108 |
# Rate limit of FTP downloads per minute
|
| 109 |
-
FTP_DOWNLOAD_RATE_LIMITER = AsyncLimiter(max_rate=
|
|
|
|
|
|
|
| 110 |
|
| 111 |
|
| 112 |
async def get_doc_archive(url: str, client: AsyncClient) -> tuple[str, str, io.BytesIO]:
|
| 113 |
"""Récupère le docx depuis l'URL et le retourne un tuple (nom, extension, contenu)"""
|
| 114 |
|
| 115 |
async with FTP_DOWNLOAD_RATE_LIMITER:
|
| 116 |
-
|
| 117 |
-
|
|
|
|
| 118 |
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
|
| 124 |
-
|
| 125 |
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
|
| 137 |
-
|
| 138 |
|
| 139 |
|
| 140 |
def apply_docx_revisions(docx_zip: zipfile.ZipFile) -> io.BytesIO:
|
|
|
|
| 106 |
|
| 107 |
|
| 108 |
# Rate limit of FTP downloads per minute
|
| 109 |
+
FTP_DOWNLOAD_RATE_LIMITER = AsyncLimiter(max_rate=60, time_period=60)
|
| 110 |
+
# Max number of parallel workers downloading
|
| 111 |
+
FTP_MAX_PARALLEL_WORKERS = asyncio.Semaphore(4)
|
| 112 |
|
| 113 |
|
| 114 |
async def get_doc_archive(url: str, client: AsyncClient) -> tuple[str, str, io.BytesIO]:
|
| 115 |
"""Récupère le docx depuis l'URL et le retourne un tuple (nom, extension, contenu)"""
|
| 116 |
|
| 117 |
async with FTP_DOWNLOAD_RATE_LIMITER:
|
| 118 |
+
async with FTP_MAX_PARALLEL_WORKERS:
|
| 119 |
+
if not url.endswith("zip"):
|
| 120 |
+
raise ValueError("URL doit pointer vers un fichier ZIP")
|
| 121 |
|
| 122 |
+
doc_id = os.path.splitext(os.path.basename(url))[0]
|
| 123 |
+
resp = await client.get(url, headers={
|
| 124 |
+
"User-Agent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
|
| 125 |
+
})
|
| 126 |
|
| 127 |
+
resp.raise_for_status()
|
| 128 |
|
| 129 |
+
with zipfile.ZipFile(io.BytesIO(resp.content)) as zf:
|
| 130 |
+
# there should be a single file per file
|
| 131 |
+
for entry in zf.infolist():
|
| 132 |
+
if entry.is_dir():
|
| 133 |
+
continue
|
| 134 |
|
| 135 |
+
file_name = entry.filename
|
| 136 |
+
root, ext = os.path.splitext(file_name)
|
| 137 |
+
doc_bytes = zf.read(file_name)
|
| 138 |
+
return (root, ext.lower(), io.BytesIO(doc_bytes))
|
| 139 |
|
| 140 |
+
raise ValueError("Aucun fichier trouvé dans l'archive")
|
| 141 |
|
| 142 |
|
| 143 |
def apply_docx_revisions(docx_zip: zipfile.ZipFile) -> io.BytesIO:
|