Spaces:
Running
Running
| from fastapi import HTTPException | |
| import requests | |
| import re | |
| from bs4 import BeautifulSoup | |
| import os | |
| import json | |
| class ETSIDocFinder: | |
| def __init__(self): | |
| self.main_ftp_url = "https://docbox.etsi.org/SET" | |
| req_data = self.connect() | |
| print(req_data['message']) | |
| self.session = req_data['session'] | |
| def connect(self): | |
| session = requests.Session() | |
| req = session.post("https://portal.etsi.org/ETSIPages/LoginEOL.ashx", verify=False, headers={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36"}, data=json.dumps({"username": os.environ.get("EOL_USER"), "password": os.environ.get("EOL_PASSWORD")})) | |
| if req.text == "Failed": | |
| return {"error": True, "session": session, "message": "Login failed ! Check your credentials"} | |
| return {"error": False, "session": session, "message": "Login successful"} | |
| def get_workgroup(self, doc: str): | |
| main_tsg = "SET-WG-R" if any(doc.startswith(kw) for kw in ["SETREQ", "SCPREQ"]) else "SET-WG-T" if any(doc.startswith(kw) for kw in ["SETTEC", "SCPTEC"]) else "SET" if any(doc.startswith(kw) for kw in ["SET", "SCP"]) else None | |
| if main_tsg is None: | |
| return None, None, None | |
| regex = re.search(r'\(([^)]+)\)', doc) | |
| workgroup = "20" + regex.group(1) | |
| return main_tsg, workgroup, doc | |
| def find_workgroup_url(self, main_tsg, workgroup): | |
| response = self.session.get(f"{self.main_ftp_url}/{main_tsg}/05-CONTRIBUTIONS", verify=False) | |
| soup = BeautifulSoup(response.text, 'html.parser') | |
| for item in soup.find_all("tr"): | |
| link = item.find("a") | |
| if link and workgroup in link.get_text(): | |
| return f"{self.main_ftp_url}/{main_tsg}/05-CONTRIBUTIONS/{link.get_text()}" | |
| return f"{self.main_ftp_url}/{main_tsg}/05-CONTRIBUTIONS/{workgroup}" | |
| def get_docs_from_url(self, url): | |
| try: | |
| response = self.session.get(url, verify=False, timeout=15) | |
| soup = BeautifulSoup(response.text, "html.parser") | |
| return [item.get_text() for item in soup.select("tr td a")] | |
| except Exception as e: | |
| print(f"Error accessing {url}: {e}") | |
| return [] | |
| def search_document(self, doc_id: str): | |
| original = doc_id | |
| main_tsg, workgroup, doc = self.get_workgroup(doc_id) | |
| urls = [] | |
| if main_tsg: | |
| wg_url = self.find_workgroup_url(main_tsg, workgroup) | |
| print(wg_url) | |
| if wg_url: | |
| files = self.get_docs_from_url(wg_url) | |
| print(files) | |
| for f in files: | |
| if doc in f.lower() or original in f: | |
| print(f) | |
| doc_url = f"{wg_url}/{f}" | |
| urls.append(doc_url) | |
| return urls[0] if len(urls) == 1 else urls[-2] if len(urls) > 1 else f"Document {doc_id} not found" | |
| class ETSISpecFinder: | |
| def __init__(self): | |
| self.main_url = "https://www.etsi.org/deliver/etsi_ts" | |
| self.second_url = "https://www.etsi.org/deliver/etsi_tr" | |
| self.headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36"} | |
| def get_spec_path(self, doc_id: str): | |
| if "-" in doc_id: | |
| position, part = doc_id.split("-") | |
| else: | |
| position, part = doc_id, None | |
| position = position.replace(" ", "") | |
| if part: | |
| if len(part) == 1: | |
| part = "0" + part | |
| spec_folder = position + part if part is not None else position | |
| return f"{int(position) - (int(position)%100)}_{int(position) - (int(position)%100) + 99}/{spec_folder}" | |
| def get_docs_from_url(self, url): | |
| try: | |
| response = requests.get(url, verify=False, timeout=15) | |
| soup = BeautifulSoup(response.text, "html.parser") | |
| docs = [item.get_text() for item in soup.find_all("a")][1:] | |
| return docs | |
| except Exception as e: | |
| print(f"Error accessing {url}: {e}") | |
| return [] | |
| def search_document(self, doc_id: str): | |
| # Example : 103 666[-2 opt] | |
| original = doc_id | |
| url = f"{self.main_url}/{self.get_spec_path(original)}/" | |
| url2 = f"{self.second_url}/{self.get_spec_path(original)}/" | |
| print(url) | |
| print(url2) | |
| releases = self.get_docs_from_url(url) | |
| files = self.get_docs_from_url(url + releases[-1]) | |
| for f in files: | |
| if f.endswith(".pdf"): | |
| return url + releases[-1] + "/" + f | |
| releases = self.get_docs_from_url(url2) | |
| files = self.get_docs_from_url(url + releases[-1]) | |
| for f in files: | |
| if f.endswith('.pdf'): | |
| return url + releases[-1] + "/" + f | |
| return f"Specification {doc_id} not found" |