from fastapi.staticfiles import StaticFiles import requests, re, warnings from dotenv import load_dotenv from fastapi import FastAPI, Request, HTTPException from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import FileResponse, StreamingResponse from bs4 import BeautifulSoup import httpx from huggingface_hub.utils import set_client_factory from schemas import * from classes import * def hf_client_factory() -> httpx.Client: return httpx.Client(verify=False) set_client_factory(hf_client_factory) warnings.filterwarnings("ignore") load_dotenv() meetings_mapping = { "SA": [ "TSG_SA", "WG1_Serv", "WG2_Arch", "WG3_Security", "WG4_CODEC", "WG5_TM", "WG6_MissionCritical" ], "CT": [ "TSG_CT", "WG1_mm-cc-sm_ex-CN1", "WG2_capability_ex-T2", "WG3_interworking_ex-CN3", "WG4_protocollars_ex-CN4", "WG5_osa_ex-CN5", "WG6_Smartcard_Ex-T3" ], "RAN": [ "TSG_RAN", "WG1_RL1", "WG2_RL2", "WG3_Iu", "WG4_Radio", "WG5_Test_ex-T1", "WG6_legacyRAN" ] } import threading _tdoc_indexer = None _spec_3gpp_indexer = None _spec_etsi_indexer = None _init_locks = { "tdoc": threading.Lock(), "3gpp": threading.Lock(), "etsi": threading.Lock(), } _indexing_locks = { "tdoc": threading.Lock(), "3gpp": threading.Lock(), "etsi": threading.Lock(), } def get_tdoc_indexer(): global _tdoc_indexer if _tdoc_indexer is None: with _init_locks["tdoc"]: if _tdoc_indexer is None: _tdoc_indexer = TDocIndexer() return _tdoc_indexer def get_spec_3gpp_indexer(): global _spec_3gpp_indexer if _spec_3gpp_indexer is None: with _init_locks["3gpp"]: if _spec_3gpp_indexer is None: _spec_3gpp_indexer = Spec3GPPIndexer() return _spec_3gpp_indexer def get_spec_etsi_indexer(): global _spec_etsi_indexer if _spec_etsi_indexer is None: with _init_locks["etsi"]: if _spec_etsi_indexer is None: _spec_etsi_indexer = SpecETSIIndexer() return _spec_etsi_indexer app = FastAPI() app.add_middleware(CORSMiddleware, allow_credentials=True, allow_headers=["*"], allow_origins=["*"]) app.mount("/static", StaticFiles(directory="static"), name="static") @app.get('/') def main(): return FileResponse("index.html") def get_folder_name(working_group: str): if working_group.endswith("P"): if working_group.startswith("S"): return ("SA", 0) if working_group.startswith("C"): return ("CT", 0) if working_group.startswith("R"): return ("RAN", 0) m = re.match(r"([A-Z]+)(\d+)", working_group) if m: code, num = m.groups() return (code, int(num)) else: raise ValueError("Unattended format") @app.get("/get_meetings/{working_group}") def get_meetings(working_group: str): category, wg_number = get_folder_name(working_group) folder = meetings_mapping[category][wg_number] url = f"https://www.3gpp.org/ftp/{meetings_mapping[category][0]}/{folder}" response = requests.get(url, verify=False, timeout=(10, 30)) responseHTML = response.text soup = BeautifulSoup(responseHTML, "html.parser") return {"url": url, "meetings": [item.get_text() for item in soup.select("tr td a") if item.get_text().startswith("TSG") or item.get_text().startswith("CT")]} @app.post("/index_tdocs/working_group") def index_tdocs_wg_progress(req: IndexTDoc): if not req.wg: raise HTTPException(status_code=400, detail="Working Group not defined !") if not _indexing_locks["tdoc"].acquire(blocking=False): raise HTTPException(status_code=409, detail="TDoc indexing already in progress") category, wg_number = get_folder_name(req.wg) folder = meetings_mapping[category][wg_number] url = f"https://www.3gpp.org/ftp/{meetings_mapping[category][0]}" indexer = get_tdoc_indexer() def generate_events(): try: yield f"event: info\ndata: {req.wg}\n\n" for content in indexer.process_workgroup(folder, url): yield content indexer.save_indexer() yield "event: end\ndata: Indexation ended successfully !\n\n" finally: _indexing_locks["tdoc"].release() return StreamingResponse(generate_events(), media_type="text/event-stream") @app.post("/index_tdocs/meeting") def index_tdocs_meeting_progress(req: IndexTDoc): if not req.wg: raise HTTPException(status_code=400, detail="Working Group not defined !") if not req.meetings: raise HTTPException(status_code=400, detail="Meetings not defined !") if not _indexing_locks["tdoc"].acquire(blocking=False): raise HTTPException(status_code=409, detail="TDoc indexing already in progress") category, wg_number = get_folder_name(req.wg) folder = meetings_mapping[category][wg_number] url = f"https://www.3gpp.org/ftp/{meetings_mapping[category][0]}/{folder}" indexer = get_tdoc_indexer() def generate_events(): try: yield f"event: get-maximum\ndata: {len(req.meetings)}\n\n" for i, meet in enumerate(req.meetings): yield f"event: info\ndata: {req.wg}-{meet}\n\n" indexer.process_meeting(meet, url) yield f"event: progress\ndata: {i+1}\n\n" indexer.save_indexer() yield "event: end\ndata: Indexation ended successfully !\n\n" finally: _indexing_locks["tdoc"].release() return StreamingResponse(generate_events(), media_type="text/event-stream") @app.post("/index_tdocs/all") def index_all_tdocs_progress(): if not _indexing_locks["tdoc"].acquire(blocking=False): raise HTTPException(status_code=409, detail="TDoc indexing already in progress") indexer = get_tdoc_indexer() def generate_events(): try: for content in indexer.index_all_tdocs(): yield content indexer.save_indexer() yield "event: end\ndata: Indexation ended successfully !\n\n" finally: _indexing_locks["tdoc"].release() return StreamingResponse(generate_events(), media_type="text/event-stream") @app.post("/index_specs/3gpp") def index_3gpp_specs_progress(): if not _indexing_locks["3gpp"].acquire(blocking=False): raise HTTPException(status_code=409, detail="3GPP spec indexing already in progress") indexer = get_spec_3gpp_indexer() def generate_events(): try: for content in indexer.run(): yield content yield "event: info\ndata: Saving index ...\n\n" yield "event: get-maximum\ndata: 1\n\n" yield "event: progress\ndata: 1\n\n" indexer.save() yield "event: info\ndata: Creating BM25 models ...\n\n" yield "event: get-maximum\ndata: 1\n\n" yield "event: progress\ndata: 1\n\n" indexer.create_bm25_index() yield "event: end\ndata: Indexation ended successfully !\n\n" finally: _indexing_locks["3gpp"].release() return StreamingResponse(generate_events(), media_type="text/event-stream") @app.post("/index_specs/etsi") def index_etsi_specs_progress(): if not _indexing_locks["etsi"].acquire(blocking=False): raise HTTPException(status_code=409, detail="ETSI spec indexing already in progress") indexer = get_spec_etsi_indexer() def generate_events(): try: for content in indexer.run(): yield content yield "event: info\ndata: Saving index ...\n\n" yield "event: get-maximum\ndata: 1\n\n" yield "event: progress\ndata: 1\n\n" indexer.save() yield "event: info\ndata: Creating BM25 models ...\n\n" yield "event: get-maximum\ndata: 1\n\n" yield "event: progress\ndata: 1\n\n" indexer.create_bm25_index() yield "event: end\ndata: Indexation ended successfully !\n\n" finally: _indexing_locks["etsi"].release() return StreamingResponse(generate_events(), media_type="text/event-stream")