import os, re, string, json, tempfile, uuid import html import inspect import importlib.resources as importlib_resources from collections import defaultdict import gradio as gr import torch import numpy as np import pandas as pd from transformers import AutoTokenizer, AutoModelForTokenClassification # ---------------------------- # Optional: FO-Tokenizer (fotokenizer) for sentence splitting # ---------------------------- _HAS_FOTOKENIZER = False try: import fotokenizer # noqa: F401 from fotokenizer import tokenize as fo_tokenize from fotokenizer import TOK as FO_TOK import fotokenizer.abbrev as fo_abbrev _HAS_FOTOKENIZER = True except Exception: _HAS_FOTOKENIZER = False def _patch_fotokenizer_for_py313() -> None: """FO-Tokenizer currently uses importlib.resources.open_text(package=..., resource=...). In Python 3.13, open_text no longer accepts the `package=` keyword. This shim patches fotokenizer so it works on Python 3.13 (Hugging Face Spaces default).""" if not _HAS_FOTOKENIZER: return try: if "package" not in inspect.signature(importlib_resources.open_text).parameters: def _open_text_compat(*args, **kwargs): if "package" in kwargs: pkg = kwargs.pop("package") res = kwargs.pop("resource") encoding = kwargs.pop("encoding", "utf-8") errors = kwargs.pop("errors", "strict") return importlib_resources.open_text(pkg, res, encoding=encoding, errors=errors) return importlib_resources.open_text(*args, **kwargs) fo_abbrev.open_text = _open_text_compat # type: ignore[attr-defined] except Exception: pass _patch_fotokenizer_for_py313() # ---------------------------- # Config # ---------------------------- MODEL_ID = "Setur/BRAGD" TAGS_FILEPATH = "Sosialurin-BRAGD_tags.csv" LABELS_FILEPATH = "tag_labels.json" TARGET_MAX_TOKENS = 256 # We will cap this to the model's max if needed. if not os.path.exists(LABELS_FILEPATH): raise RuntimeError(f"Missing {LABELS_FILEPATH}. Add it to the Space repo root.") INTERVALS = ( (15, 29), (30, 33), (34, 36), (37, 41), (42, 43), (44, 45), (46, 50), (51, 53), (54, 60), (61, 63), (64, 66), (67, 70), (71, 72) ) GROUP_ORDER = [ "subcategory", "gender", "number", "case", "article", "proper", "degree", "declension", "mood", "voice", "tense", "person", "definiteness" ] HIDE_CODES = {"subcategory": {"B"}} # Subcategory B to be removed UI = { "fo": {"w": "Orð", "t": "Mark", "s": "Útgreining", "m": "Útgreinað marking"}, "en": {"w": "Word", "t": "Tag", "s": "Analysis", "m": "Expanded tags"}, } MODEL_LINK = "https://huggingface.co/Setur/BRAGD" # ---------------------------- # Minimal CSS: ONLY the buttons (and a tiny header layout helper) # ---------------------------- CSS = """ /* Keep Gradio default styling; only override our buttons. */ #btn_tag, #lang_fo_on, #lang_en_on{ background:#89AFA9 !important; border-color:#6F9992 !important; color:#0b1b19 !important; } #btn_tag:hover, #lang_fo_on:hover, #lang_en_on:hover{ background:#6F9992 !important; border-color:#6F9992 !important; color:#0b1b19 !important; } #lang_fo_off, #lang_en_off, #btn_dl_main, #btn_dl_exp{ background:#C6DAD6 !important; border-color:#6F9992 !important; color:#0b1b19 !important; } #lang_fo_off:hover, #lang_en_off:hover, #btn_dl_main:hover, #btn_dl_exp:hover{ background:#89AFA9 !important; border-color:#6F9992 !important; color:#0b1b19 !important; } @media (prefers-color-scheme: dark){ #lang_fo_off, #lang_en_off, #btn_dl_main, #btn_dl_exp{ background:#2a3b38 !important; border-color:#6F9992 !important; color:#e7eceb !important; } #lang_fo_off:hover, #lang_en_off:hover, #btn_dl_main:hover, #btn_dl_exp:hover{ background:#89AFA9 !important; border-color:#6F9992 !important; color:#0b1b19 !important; } } #results_hdr{ display:flex !important; align-items:center !important; gap:12px !important; } #results_hdr > .gr-markdown{ flex:1 1 auto !important; } #lang_buttons{ display:flex !important; gap:10px !important; justify-content:flex-end !important; align-items:center !important; flex-wrap:nowrap !important; } #lang_buttons .gr-button, #lang_buttons button{ width:auto !important; min-width:120px !important; flex:0 0 auto !important; } #expanded_hdr{ display:flex !important; align-items:center !important; gap:12px !important; } #expanded_hdr > .gr-markdown{ flex:1 1 auto !important; } #expanded_buttons{ display:flex !important; gap:10px !important; justify-content:flex-end !important; align-items:center !important; flex-wrap:nowrap !important; } #expanded_buttons .gr-button, #expanded_buttons button{ width:auto !important; min-width:120px !important; flex:0 0 auto !important; } #input_col, #input_col > div, #input_col .gr-block, #input_col .gr-box, #input_col .gr-panel, #input_col .gr-group, #input_col .gr-form{ background: transparent !important; border: 0 !important; box-shadow: none !important; } #btn_tag{ align-self:flex-start !important; flex:0 0 auto !important; height:fit-content !important; } #btn_tag button{ height:auto !important; } #out_df .df-scroll, #out_mean_df .df-scroll{ overflow-x:auto !important; width:100% !important; } #out_df table.df-table, #out_mean_df table.df-table{ border-collapse:collapse !important; width:max-content !important; min-width:100% !important; } #out_df th, #out_df td, #out_mean_df th, #out_mean_df td{ white-space:nowrap !important; padding:10px 12px !important; border:1px solid rgba(0,0,0,0.12) !important; text-align:left !important; vertical-align:top !important; } #out_df thead th, #out_mean_df thead th{ font-weight:600 !important; background: rgba(0,0,0,0.03) !important; } @media (prefers-color-scheme: dark){ #out_df th, #out_df td, #out_mean_df th, #out_mean_df td{ border:1px solid rgba(255,255,255,0.14) !important; } #out_df thead th, #out_mean_df thead th{ background: rgba(255,255,255,0.06) !important; } } """ # ---------------------------- # Tokenization # ---------------------------- def simp_tok(sentence: str): return re.findall(r"\w+|[" + re.escape(string.punctuation) + "]", sentence) # ---------------------------- # Sentence splitting # ---------------------------- def split_sentences(text: str): """Split input into sentences. We use FO-Tokenizer sentence markers (BEGIN_SENT / END_SENT) when possible. Important detail: some FO-Tokenizer builds emit *whitespace* as "descriptor-only" tokens (empty `.txt`). If we simply join `.txt` pieces we can lose spaces and end up with merged words (e.g. `Núriggarkanska`). This function therefore: - preserves `.txt` pieces as-is - converts descriptor-only whitespace-like tokens into a single space - adds a best-effort inserted space between tokens in cases where whitespace is missing but clearly intended (word→word, comma/semicolon/colon→word) """ s = (text or "") if not s.strip(): return [] def _norm(piece: str) -> str: return re.sub(r"[\r\n]+", " ", piece) def _append_piece(buf: list[str], piece: str) -> None: if not piece: return piece = _norm(piece) if not buf: buf.append(piece) return last = buf[-1] last_char = last[-1] if last else "" if last_char.isspace(): buf.append(piece) return if piece[0].isalnum() and (last_char.isalnum() or last_char in {",", ";", ":"}): buf.append(" ") buf.append(piece) if _HAS_FOTOKENIZER: try: toks = fo_tokenize(s) sents: list[str] = [] cur: list[str] = [] for tok in toks: if getattr(tok, "txt", None): _append_piece(cur, tok.txt) continue descr = FO_TOK.descr.get(tok.kind, "").replace(" ", "_") if descr == "BEGIN_SENT": if cur: sent = "".join(cur).strip() if sent: sents.append(sent) cur = [] continue if descr == "END_SENT": sent = "".join(cur).strip() if sent: sents.append(sent) cur = [] continue up = descr.upper() if "WHITESPACE" in up or "SPACE" in up or "TAB" in up: _append_piece(cur, " ") elif "NEWLINE" in up or ("LINE" in up and "BREAK" in up): _append_piece(cur, " ") elif up == "DASH": _append_piece(cur, "-") else: pass if cur: sent = "".join(cur).strip() if sent: sents.append(sent) return sents or [s.strip()] except Exception: pass parts = re.split(r"(?<=[.!?])\s+", s.strip()) return [p.strip() for p in parts if p.strip()] def run_model_multisentence(text: str): """Run the model sentence-by-sentence and concatenate the rows.""" rows_all = [] for sent in split_sentences(text): rows_all.extend(run_model(sent)) return rows_all # ---------------------------- # CSV mapping # ---------------------------- def load_tag_mappings(path: str): df = pd.read_csv(path) feature_cols = list(df.columns[1:]) tag_to_features = { row["Original Tag"]: row[1:].values.astype(int) for _, row in df.iterrows() } features_to_tag = { tuple(row[1:].values.astype(int)): row["Original Tag"] for _, row in df.iterrows() } return tag_to_features, features_to_tag, len(feature_cols), feature_cols def group_from_col(col: str): if col == "Article": return ("article", "A") if col.startswith("No-Article "): return ("article", col.split()[-1]) if col == "Proper Noun": return ("proper", "P") if col.startswith("Not-Proper-Noun "): return ("proper", col.split()[-1]) prefixes = [ ("Word Class ", "word_class"), ("Subcategory ", "subcategory"), ("No-Subcategory ", "subcategory"), ("Gender ", "gender"), ("No-Gender ", "gender"), ("Number ", "number"), ("No-Number ", "number"), ("Case ", "case"), ("No-Case ", "case"), ("Degree ", "degree"), ("No-Degree ", "degree"), ("Declension ", "declension"), ("No-Declension ", "declension"), ("Mood ", "mood"), ("Voice ", "voice"), ("No-Voice ", "voice"), ("Tense ", "tense"), ("No-Tense ", "tense"), ("Person ", "person"), ("No-Person ", "person"), ("Definite ", "definiteness"), ("Indefinite ", "definiteness"), ] for p, g in prefixes: if col.startswith(p): return (g, col.split()[-1]) return (None, None) def process_tag_features(tag_to_features: dict, intervals): arrs = [np.array(tpl) for tpl in set(tuple(a) for a in tag_to_features.values())] wt_masks = {wt: [a for a in arrs if a[wt] == 1] for wt in range(15)} out = {} for wt, labels in wt_masks.items(): if not labels: out[wt] = [] continue sum_labels = np.sum(np.array(labels), axis=0) out[wt] = [iv for iv in intervals if np.sum(sum_labels[iv[0]:iv[1] + 1]) != 0] return out def predict_vectors(logits, attention_mask, begin_tokens, dict_intervals, vec_len): softmax = torch.nn.Softmax(dim=0) vectors = [] for idx in range(len(logits)): if attention_mask[idx].item() != 1 or begin_tokens[idx] != 1: continue pred = logits[idx] vec = torch.zeros(vec_len, device=logits.device) wt = torch.argmax(softmax(pred[0:15])).item() vec[wt] = 1 for (a, b) in dict_intervals.get(wt, []): seg = pred[a:b + 1] k = torch.argmax(softmax(seg)).item() vec[a + k] = 1 vectors.append(vec) return vectors # ---------------------------- # Load labels # ---------------------------- with open(LABELS_FILEPATH, "r", encoding="utf-8") as f: LABELS = json.load(f) def label_for(lang: str, group: str, wc: str, code: str) -> str: lang = "fo" if lang == "fo" else "en" by_wc = LABELS.get(lang, {}).get("by_word_class", {}) glob = LABELS.get(lang, {}).get("global", {}) if wc and wc in by_wc and code in by_wc[wc].get(group, {}): return by_wc[wc][group][code] return glob.get(group, {}).get(code, "") def clean_label(s: str) -> str: s = (s or "").strip() s = re.sub(r"\s+", " ", s) return s.strip(" -;,:").strip() # ---------------------------- # Load model + mapping # ---------------------------- tag_to_features, features_to_tag, VEC_LEN, FEATURE_COLS = load_tag_mappings(TAGS_FILEPATH) tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) model = AutoModelForTokenClassification.from_pretrained(MODEL_ID) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model.to(device) model.eval() MAX_TOKENS = int(TARGET_MAX_TOKENS) _model_max = getattr(getattr(model, "config", None), "max_position_embeddings", None) _tok_max = getattr(tokenizer, "model_max_length", None) for _m in (_model_max, _tok_max): if isinstance(_m, int) and 0 < _m < 100000: MAX_TOKENS = min(MAX_TOKENS, _m) if hasattr(model, "config") and hasattr(model.config, "num_labels") and model.config.num_labels != VEC_LEN: raise RuntimeError(f"Label size mismatch: model={model.config.num_labels}, csv={VEC_LEN}. Wrong CSV?") DICT_INTERVALS = process_tag_features(tag_to_features, INTERVALS) GROUPS = defaultdict(list) for i, col in enumerate(FEATURE_COLS): g, code = group_from_col(col) if g and code not in HIDE_CODES.get(g, set()): GROUPS[g].append((i, code, col)) def vector_to_tag(vec: torch.Tensor) -> str: return features_to_tag.get(tuple(vec.int().tolist()), "Unknown Tag") def wc_code(vec: torch.Tensor) -> str: for idx, code, _ in GROUPS["word_class"]: if int(vec[idx].item()) == 1: return code return "" def group_code(vec: torch.Tensor, group: str) -> str: hidden = HIDE_CODES.get(group, set()) for idx, code, _ in GROUPS.get(group, []): if code in hidden: continue if int(vec[idx].item()) == 1: return code return "" HIDE_IN_ANALYSIS = {("D", "subcategory", "G"), ("D", "subcategory", "N")} VOICE_ANALYSIS = { "fo": {"A": "gerðsøgn", "M": "miðalsøgn", "v": "orð luttøkuháttur"}, "en": {"A": "active voice", "M": "middle voice", "v": "supine form"}, } def analysis_text(vec: torch.Tensor, lang: str) -> str: lang = "fo" if lang == "fo" else "en" tag = vector_to_tag(vec) wc = wc_code(vec) mood_code = group_code(vec, "mood") if wc == "V" else "" skip_empty_verb_feats = (wc == "V" and mood_code in {"I", "M"}) if tag == "DGd": return "fyriseting" if lang == "fo" else "preposition" mood = group_code(vec, "mood") if mood == "U": sup = label_for(lang, "mood", wc, "U") or ("luttøkuháttur" if lang == "fo" else "supine") vcode = group_code(vec, "voice") or "v" vlabel = VOICE_ANALYSIS[lang].get(vcode, VOICE_ANALYSIS[lang]["v"]) return f"{clean_label(sup)}, {clean_label(vlabel)}" parts = [] if wc in {"P", "C"}: subc = group_code(vec, "subcategory") subl = clean_label(label_for(lang, "subcategory", wc, subc) or "") if subl: parts.append(subl) else: wcl = clean_label(label_for(lang, "word_class", wc, wc) or wc) if wcl: parts.append(wcl) for g in GROUP_ORDER: c = group_code(vec, g) if not c: continue if skip_empty_verb_feats and g in {"number", "tense", "person"} and c in {"n", "t", "p"}: continue if wc in {"P", "C"} and g == "subcategory": continue if (wc, g, c) in HIDE_IN_ANALYSIS: continue lbl = clean_label(label_for(lang, g, wc, c) or label_for(lang, g, "", c) or "") if lbl and lbl not in parts: parts.append(lbl) return ", ".join(parts) def expanded_text(vec: torch.Tensor, lang: str) -> str: lang = "fo" if lang == "fo" else "en" wc = wc_code(vec) parts = [] wc_lbl = label_for(lang, "word_class", wc, wc) parts.append(f"{wc} – {wc_lbl}" if wc_lbl else wc) for g in GROUP_ORDER: c = group_code(vec, g) if not c: continue lbl = label_for(lang, g, wc, c) or label_for(lang, g, "", c) parts.append(f"{c} – {lbl}" if lbl else c) return "; ".join([p for p in parts if p]) def compute_codes_by_wc(): codes = defaultdict(lambda: defaultdict(set)) for arr in tag_to_features.values(): arr = np.array(arr) wc = None for idx, code, _ in GROUPS["word_class"]: if arr[idx] == 1: wc = code break if not wc: continue for g in GROUP_ORDER: hidden = HIDE_CODES.get(g, set()) for idx, code, _ in GROUPS.get(g, []): if code in hidden: continue if arr[idx] == 1: codes[wc][g].add(code) return codes CODES_BY_WC = compute_codes_by_wc() def build_overview(lang: str) -> str: lang = "fo" if lang == "fo" else "en" title = "### Markayvirlit" if lang == "fo" else "### Tag Overview" lines = [title, ""] for wc in sorted(CODES_BY_WC.keys()): wcl = label_for(lang, "word_class", wc, wc) or "" lines.append(f"#### {wc} — {wcl}" if wcl else f"#### {wc}") for g in GROUP_ORDER: cs = sorted(CODES_BY_WC[wc].get(g, set())) if not cs: continue group_name = { "fo": { "subcategory": "Undirflokkur", "gender": "Kyn", "number": "Tal", "case": "Fall", "article": "Bundni/óbundni", "proper": "Sernavn / felagsnavn", "degree": "Stig", "declension": "Bending", "mood": "Háttur", "voice": "Søgn", "tense": "Tíð", "person": "Persónur", "definiteness": "Bundni/óbundni" }, "en": { "subcategory": "Subcategory", "gender": "Gender", "number": "Number", "case": "Case", "article": "Definiteness", "proper": "Proper/common noun", "degree": "Degree", "declension": "Declension", "mood": "Mood", "voice": "Voice", "tense": "Tense", "person": "Person", "definiteness": "Definiteness" }, }[lang].get(g, g) lines.append(f"**{group_name}**") for c in cs: lbl = label_for(lang, g, wc, c) or label_for(lang, g, "", c) lines.append(f"- `{c}` — {lbl}" if lbl else f"- `{c}`") lines.append("") lines.append("") return "\n".join(lines).strip() def run_model(sentence: str): s = (sentence or "").strip() if not s: return [] tokens = simp_tok(s) if not tokens: return [] enc = tokenizer( tokens, is_split_into_words=True, add_special_tokens=True, max_length=MAX_TOKENS, padding="max_length", truncation=True, return_attention_mask=True, return_tensors="pt", ) input_ids = enc["input_ids"].to(device) attention_mask = enc["attention_mask"].to(device) word_ids = enc.word_ids(batch_index=0) begin, last = [], None for wid in word_ids: if wid is None: begin.append(0) elif wid != last: begin.append(1) else: begin.append(0) last = wid with torch.no_grad(): logits = model(input_ids=input_ids, attention_mask=attention_mask).logits[0] vectors = predict_vectors(logits, attention_mask[0], begin, DICT_INTERVALS, VEC_LEN) rows, vec_i, seen = [], 0, set() for i, wid in enumerate(word_ids): if wid is None or begin[i] != 1 or wid in seen: continue seen.add(wid) word = tokens[wid] if wid < len(tokens) else "" vec = vectors[vec_i] if vec_i < len(vectors) else torch.zeros(VEC_LEN, device=device) rows.append({"word": word, "vec": vec.int().tolist()}) vec_i += 1 return rows def _make_html_table(headers, rows): th = "".join(f"{html.escape(str(h))}" for h in headers) body_rows = [] for row in rows: tds = "".join(f"{html.escape(str(c))}" for c in row) body_rows.append(f"{tds}") body = "".join(body_rows) return ( '
' f'{th}{body}
' '
' ) def render(rows_state, lang: str): lang = "fo" if lang == "fo" else "en" cols_main = [UI[lang]["w"], UI[lang]["t"], UI[lang]["s"]] cols_mean = [UI[lang]["w"], UI[lang]["t"], UI[lang]["m"]] if not rows_state: return (_make_html_table(cols_main, []), _make_html_table(cols_mean, []), build_overview(lang)) out_main, out_mean = [], [] for r in rows_state: vec = torch.tensor(r["vec"]) tag = vector_to_tag(vec) out_main.append([r["word"], tag, analysis_text(vec, lang)]) out_mean.append([r["word"], tag, expanded_text(vec, lang)]) return (_make_html_table(cols_main, out_main), _make_html_table(cols_mean, out_mean), build_overview(lang)) def _write_tsv(df: pd.DataFrame, filename: str) -> str: tmpdir = os.path.join(tempfile.gettempdir(), "marka_downloads", str(uuid.uuid4())) os.makedirs(tmpdir, exist_ok=True) path = os.path.join(tmpdir, filename) df.to_csv(path, sep="\t", index=False, encoding="utf-8") return path def build_download_main(rows_state) -> str: words, tags, fo_vals, en_vals = [], [], [], [] for r in (rows_state or []): vec = torch.tensor(r["vec"]) tag = vector_to_tag(vec) words.append(r["word"]) tags.append(tag) fo_vals.append(analysis_text(vec, "fo")) en_vals.append(analysis_text(vec, "en")) df = pd.DataFrame({ UI["fo"]["w"]: words, UI["fo"]["t"]: tags, UI["fo"]["s"]: fo_vals, UI["en"]["s"]: en_vals, }) return _write_tsv(df, "Markað.tsv") def build_download_expanded(rows_state, lang: str) -> str: lang = "fo" if lang == "fo" else "en" words, tags, vals = [], [], [] for r in (rows_state or []): vec = torch.tensor(r["vec"]) tag = vector_to_tag(vec) words.append(r["word"]) tags.append(tag) vals.append(expanded_text(vec, lang)) df = pd.DataFrame({ UI[lang]["w"]: words, UI[lang]["t"]: tags, UI[lang]["m"]: vals, }) return _write_tsv(df, "Markað_útgreinað.tsv") with gr.Blocks(css=CSS, title="Marka") as demo: with gr.Row(equal_height=False): with gr.Column(scale=2, elem_id="input_col"): inp = gr.Textbox( lines=6, placeholder="Skriva her ... / Type here ...", show_label=False, elem_id="input_box", ) with gr.Column(scale=1, min_width=320): gr.Markdown( "## Marka\n" "Skriv ein setning í kassan og fá hann markaðan.\n\n" f"Myndil / Model: [{MODEL_ID}]({MODEL_LINK})" ) btn = gr.Button("Marka / Tag", variant="primary", elem_id="btn_tag") state = gr.State([]) lang_state = gr.State("fo") results_hdr = gr.Row(elem_id="results_hdr", visible=True) with results_hdr: results_title = gr.Markdown("### Úrslit / Results") with gr.Row(elem_id="lang_buttons") as lang_buttons_row: btn_lang_fo_on = gr.Button("Føroyskt", variant="primary", elem_id="lang_fo_on", visible=False) btn_lang_fo_off = gr.Button("Føroyskt", variant="secondary", elem_id="lang_fo_off", visible=False) btn_lang_en_on = gr.Button("English", variant="primary", elem_id="lang_en_on", visible=False) btn_lang_en_off = gr.Button("English", variant="secondary", elem_id="lang_en_off", visible=False) btn_dl_main = gr.DownloadButton("Tak niður / Download", variant="secondary", elem_id="btn_dl_main", visible=False) out_df = gr.HTML(value="", elem_id="out_df", visible=False) expanded_acc = gr.Accordion("Útgreinað marking / Expanded tags", open=False, visible=False) with expanded_acc: with gr.Row(elem_id="expanded_hdr"): gr.Markdown(" ") with gr.Row(elem_id="expanded_buttons"): btn_dl_exp = gr.DownloadButton("Tak niður / Download", variant="secondary", elem_id="btn_dl_exp", visible=False) out_mean_df = gr.HTML(value="", elem_id="out_mean_df") overview_acc = gr.Accordion("Markayvirlit / Tag Overview", open=False, visible=True) with overview_acc: overview_md = gr.Markdown(build_overview("fo")) def show_loading(lang_current): lang_current = "fo" if lang_current == "fo" else "en" cols_main = [UI[lang_current]["w"], UI[lang_current]["t"], UI[lang_current]["s"]] shell = _make_html_table(cols_main, []) return ( gr.update(value=shell, visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(value=""), gr.update(value="Markar... / Tagging...", interactive=False), ) def on_tag(text, lang_current): rows = run_model_multisentence(text) df_main, df_mean, overview = render(rows, lang_current) show_fo = (lang_current == "fo") show_en = (lang_current == "en") have_rows = bool(rows) dl_main_path = build_download_main(rows) if have_rows else None dl_exp_path = build_download_expanded(rows, lang_current) if have_rows else None return ( rows, gr.update(value=df_main, visible=True), gr.update(value=df_mean), gr.update(value=overview), gr.update(visible=True), gr.update(visible=show_fo), gr.update(visible=not show_fo), gr.update(visible=show_en), gr.update(visible=not show_en), gr.update(value=dl_main_path, visible=have_rows), gr.update(value=dl_exp_path, visible=have_rows), lang_current, gr.update(value="Marka / Tag", interactive=True), ) def on_set_lang(rows, lang_value): df_main, df_mean, overview = render(rows, lang_value) show_fo = (lang_value == "fo") show_en = (lang_value == "en") have_rows = bool(rows) dl_main_path = build_download_main(rows) if have_rows else None dl_exp_path = build_download_expanded(rows, lang_value) if have_rows else None return ( lang_value, gr.update(value=df_main), gr.update(value=df_mean), gr.update(value=overview), gr.update(visible=show_fo), gr.update(visible=not show_fo), gr.update(visible=show_en), gr.update(visible=not show_en), gr.update(value=dl_main_path, visible=have_rows), gr.update(value=dl_exp_path, visible=have_rows), ) def on_set_fo(rows): return on_set_lang(rows, "fo") def on_set_en(rows): return on_set_lang(rows, "en") _evt = btn.click( show_loading, inputs=[lang_state], outputs=[out_df, btn_dl_main, btn_dl_exp, expanded_acc, out_mean_df, btn], queue=False, ) _evt.then( on_tag, inputs=[inp, lang_state], outputs=[ state, out_df, out_mean_df, overview_md, expanded_acc, btn_lang_fo_on, btn_lang_fo_off, btn_lang_en_on, btn_lang_en_off, btn_dl_main, btn_dl_exp, lang_state, btn ], queue=False, ) btn_lang_fo_on.click( on_set_fo, inputs=[state], outputs=[ lang_state, out_df, out_mean_df, overview_md, btn_lang_fo_on, btn_lang_fo_off, btn_lang_en_on, btn_lang_en_off, btn_dl_main, btn_dl_exp ], queue=False, ) btn_lang_fo_off.click( on_set_fo, inputs=[state], outputs=[ lang_state, out_df, out_mean_df, overview_md, btn_lang_fo_on, btn_lang_fo_off, btn_lang_en_on, btn_lang_en_off, btn_dl_main, btn_dl_exp ], queue=False, ) btn_lang_en_on.click( on_set_en, inputs=[state], outputs=[ lang_state, out_df, out_mean_df, overview_md, btn_lang_fo_on, btn_lang_fo_off, btn_lang_en_on, btn_lang_en_off, btn_dl_main, btn_dl_exp ], queue=False, ) btn_lang_en_off.click( on_set_en, inputs=[state], outputs=[ lang_state, out_df, out_mean_df, overview_md, btn_lang_fo_on, btn_lang_fo_off, btn_lang_en_on, btn_lang_en_off, btn_dl_main, btn_dl_exp ], queue=False, ) if __name__ == "__main__": demo.launch()