Spaces:
Sleeping
Sleeping
| from fastapi.staticfiles import StaticFiles | |
| import requests, re, warnings | |
| from dotenv import load_dotenv | |
| from fastapi import FastAPI, Request, HTTPException | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from fastapi.responses import FileResponse, StreamingResponse | |
| from bs4 import BeautifulSoup | |
| import httpx | |
| from huggingface_hub.utils import set_client_factory | |
| from schemas import * | |
| from classes import * | |
| def hf_client_factory() -> httpx.Client: | |
| return httpx.Client(verify=False) | |
| set_client_factory(hf_client_factory) | |
| warnings.filterwarnings("ignore") | |
| load_dotenv() | |
| meetings_mapping = { | |
| "SA": [ | |
| "TSG_SA", | |
| "WG1_Serv", | |
| "WG2_Arch", | |
| "WG3_Security", | |
| "WG4_CODEC", | |
| "WG5_TM", | |
| "WG6_MissionCritical" | |
| ], | |
| "CT": [ | |
| "TSG_CT", | |
| "WG1_mm-cc-sm_ex-CN1", | |
| "WG2_capability_ex-T2", | |
| "WG3_interworking_ex-CN3", | |
| "WG4_protocollars_ex-CN4", | |
| "WG5_osa_ex-CN5", | |
| "WG6_Smartcard_Ex-T3" | |
| ], | |
| "RAN": [ | |
| "TSG_RAN", | |
| "WG1_RL1", | |
| "WG2_RL2", | |
| "WG3_Iu", | |
| "WG4_Radio", | |
| "WG5_Test_ex-T1", | |
| "WG6_legacyRAN" | |
| ] | |
| } | |
| import threading | |
| _tdoc_indexer = None | |
| _spec_3gpp_indexer = None | |
| _spec_etsi_indexer = None | |
| _init_locks = { | |
| "tdoc": threading.Lock(), | |
| "3gpp": threading.Lock(), | |
| "etsi": threading.Lock(), | |
| } | |
| _indexing_locks = { | |
| "tdoc": threading.Lock(), | |
| "3gpp": threading.Lock(), | |
| "etsi": threading.Lock(), | |
| } | |
| def get_tdoc_indexer(): | |
| global _tdoc_indexer | |
| if _tdoc_indexer is None: | |
| with _init_locks["tdoc"]: | |
| if _tdoc_indexer is None: | |
| _tdoc_indexer = TDocIndexer() | |
| return _tdoc_indexer | |
| def get_spec_3gpp_indexer(): | |
| global _spec_3gpp_indexer | |
| if _spec_3gpp_indexer is None: | |
| with _init_locks["3gpp"]: | |
| if _spec_3gpp_indexer is None: | |
| _spec_3gpp_indexer = Spec3GPPIndexer() | |
| return _spec_3gpp_indexer | |
| def get_spec_etsi_indexer(): | |
| global _spec_etsi_indexer | |
| if _spec_etsi_indexer is None: | |
| with _init_locks["etsi"]: | |
| if _spec_etsi_indexer is None: | |
| _spec_etsi_indexer = SpecETSIIndexer() | |
| return _spec_etsi_indexer | |
| app = FastAPI() | |
| app.add_middleware(CORSMiddleware, allow_credentials=True, allow_headers=["*"], allow_origins=["*"]) | |
| app.mount("/static", StaticFiles(directory="static"), name="static") | |
| def main(): | |
| return FileResponse("index.html") | |
| def get_folder_name(working_group: str): | |
| if working_group.endswith("P"): | |
| if working_group.startswith("S"): | |
| return ("SA", 0) | |
| if working_group.startswith("C"): | |
| return ("CT", 0) | |
| if working_group.startswith("R"): | |
| return ("RAN", 0) | |
| m = re.match(r"([A-Z]+)(\d+)", working_group) | |
| if m: | |
| code, num = m.groups() | |
| return (code, int(num)) | |
| else: | |
| raise ValueError("Unattended format") | |
| def get_meetings(working_group: str): | |
| category, wg_number = get_folder_name(working_group) | |
| folder = meetings_mapping[category][wg_number] | |
| url = f"https://www.3gpp.org/ftp/{meetings_mapping[category][0]}/{folder}" | |
| response = requests.get(url, verify=False, timeout=(10, 30)) | |
| responseHTML = response.text | |
| soup = BeautifulSoup(responseHTML, "html.parser") | |
| return {"url": url, "meetings": [item.get_text() for item in soup.select("tr td a") if item.get_text().startswith("TSG") or item.get_text().startswith("CT")]} | |
| def index_tdocs_wg_progress(req: IndexTDoc): | |
| if not req.wg: | |
| raise HTTPException(status_code=400, detail="Working Group not defined !") | |
| if not _indexing_locks["tdoc"].acquire(blocking=False): | |
| raise HTTPException(status_code=409, detail="TDoc indexing already in progress") | |
| category, wg_number = get_folder_name(req.wg) | |
| folder = meetings_mapping[category][wg_number] | |
| url = f"https://www.3gpp.org/ftp/{meetings_mapping[category][0]}" | |
| indexer = get_tdoc_indexer() | |
| def generate_events(): | |
| try: | |
| yield f"event: info\ndata: {req.wg}\n\n" | |
| for content in indexer.process_workgroup(folder, url): | |
| yield content | |
| indexer.save_indexer() | |
| yield "event: end\ndata: Indexation ended successfully !\n\n" | |
| finally: | |
| _indexing_locks["tdoc"].release() | |
| return StreamingResponse(generate_events(), media_type="text/event-stream") | |
| def index_tdocs_meeting_progress(req: IndexTDoc): | |
| if not req.wg: | |
| raise HTTPException(status_code=400, detail="Working Group not defined !") | |
| if not req.meetings: | |
| raise HTTPException(status_code=400, detail="Meetings not defined !") | |
| if not _indexing_locks["tdoc"].acquire(blocking=False): | |
| raise HTTPException(status_code=409, detail="TDoc indexing already in progress") | |
| category, wg_number = get_folder_name(req.wg) | |
| folder = meetings_mapping[category][wg_number] | |
| url = f"https://www.3gpp.org/ftp/{meetings_mapping[category][0]}/{folder}" | |
| indexer = get_tdoc_indexer() | |
| def generate_events(): | |
| try: | |
| yield f"event: get-maximum\ndata: {len(req.meetings)}\n\n" | |
| for i, meet in enumerate(req.meetings): | |
| yield f"event: info\ndata: {req.wg}-{meet}\n\n" | |
| indexer.process_meeting(meet, url) | |
| yield f"event: progress\ndata: {i+1}\n\n" | |
| indexer.save_indexer() | |
| yield "event: end\ndata: Indexation ended successfully !\n\n" | |
| finally: | |
| _indexing_locks["tdoc"].release() | |
| return StreamingResponse(generate_events(), media_type="text/event-stream") | |
| def index_all_tdocs_progress(): | |
| if not _indexing_locks["tdoc"].acquire(blocking=False): | |
| raise HTTPException(status_code=409, detail="TDoc indexing already in progress") | |
| indexer = get_tdoc_indexer() | |
| def generate_events(): | |
| try: | |
| for content in indexer.index_all_tdocs(): | |
| yield content | |
| indexer.save_indexer() | |
| yield "event: end\ndata: Indexation ended successfully !\n\n" | |
| finally: | |
| _indexing_locks["tdoc"].release() | |
| return StreamingResponse(generate_events(), media_type="text/event-stream") | |
| def index_3gpp_specs_progress(): | |
| if not _indexing_locks["3gpp"].acquire(blocking=False): | |
| raise HTTPException(status_code=409, detail="3GPP spec indexing already in progress") | |
| indexer = get_spec_3gpp_indexer() | |
| def generate_events(): | |
| try: | |
| for content in indexer.run(): | |
| yield content | |
| yield "event: info\ndata: Saving index ...\n\n" | |
| yield "event: get-maximum\ndata: 1\n\n" | |
| yield "event: progress\ndata: 1\n\n" | |
| indexer.save() | |
| yield "event: info\ndata: Creating BM25 models ...\n\n" | |
| yield "event: get-maximum\ndata: 1\n\n" | |
| yield "event: progress\ndata: 1\n\n" | |
| indexer.create_bm25_index() | |
| yield "event: end\ndata: Indexation ended successfully !\n\n" | |
| finally: | |
| _indexing_locks["3gpp"].release() | |
| return StreamingResponse(generate_events(), media_type="text/event-stream") | |
| def index_etsi_specs_progress(): | |
| if not _indexing_locks["etsi"].acquire(blocking=False): | |
| raise HTTPException(status_code=409, detail="ETSI spec indexing already in progress") | |
| indexer = get_spec_etsi_indexer() | |
| def generate_events(): | |
| try: | |
| for content in indexer.run(): | |
| yield content | |
| yield "event: info\ndata: Saving index ...\n\n" | |
| yield "event: get-maximum\ndata: 1\n\n" | |
| yield "event: progress\ndata: 1\n\n" | |
| indexer.save() | |
| yield "event: info\ndata: Creating BM25 models ...\n\n" | |
| yield "event: get-maximum\ndata: 1\n\n" | |
| yield "event: progress\ndata: 1\n\n" | |
| indexer.create_bm25_index() | |
| yield "event: end\ndata: Indexation ended successfully !\n\n" | |
| finally: | |
| _indexing_locks["etsi"].release() | |
| return StreamingResponse(generate_events(), media_type="text/event-stream") |