Marka / app.py
unijoh's picture
Update app.py
fa38fed verified
import os, re, string, json, tempfile, uuid
import html
import inspect
import importlib.resources as importlib_resources
from collections import defaultdict
import gradio as gr
import torch
import numpy as np
import pandas as pd
from transformers import AutoTokenizer, AutoModelForTokenClassification
# ----------------------------
# Optional: FO-Tokenizer (fotokenizer) for sentence splitting
# ----------------------------
_HAS_FOTOKENIZER = False
try:
import fotokenizer # noqa: F401
from fotokenizer import tokenize as fo_tokenize
from fotokenizer import TOK as FO_TOK
import fotokenizer.abbrev as fo_abbrev
_HAS_FOTOKENIZER = True
except Exception:
_HAS_FOTOKENIZER = False
def _patch_fotokenizer_for_py313() -> None:
"""FO-Tokenizer currently uses importlib.resources.open_text(package=..., resource=...).
In Python 3.13, open_text no longer accepts the `package=` keyword.
This shim patches fotokenizer so it works on Python 3.13 (Hugging Face Spaces default)."""
if not _HAS_FOTOKENIZER:
return
try:
if "package" not in inspect.signature(importlib_resources.open_text).parameters:
def _open_text_compat(*args, **kwargs):
if "package" in kwargs:
pkg = kwargs.pop("package")
res = kwargs.pop("resource")
encoding = kwargs.pop("encoding", "utf-8")
errors = kwargs.pop("errors", "strict")
return importlib_resources.open_text(pkg, res, encoding=encoding, errors=errors)
return importlib_resources.open_text(*args, **kwargs)
fo_abbrev.open_text = _open_text_compat # type: ignore[attr-defined]
except Exception:
pass
_patch_fotokenizer_for_py313()
# ----------------------------
# Config
# ----------------------------
MODEL_ID = "Setur/BRAGD"
TAGS_FILEPATH = "Sosialurin-BRAGD_tags.csv"
LABELS_FILEPATH = "tag_labels.json"
TARGET_MAX_TOKENS = 256 # We will cap this to the model's max if needed.
if not os.path.exists(LABELS_FILEPATH):
raise RuntimeError(f"Missing {LABELS_FILEPATH}. Add it to the Space repo root.")
INTERVALS = (
(15, 29), (30, 33), (34, 36), (37, 41), (42, 43), (44, 45), (46, 50),
(51, 53), (54, 60), (61, 63), (64, 66), (67, 70), (71, 72)
)
GROUP_ORDER = [
"subcategory", "gender", "number", "case", "article", "proper",
"degree", "declension", "mood", "voice", "tense", "person", "definiteness"
]
HIDE_CODES = {"subcategory": {"B"}} # Subcategory B to be removed
UI = {
"fo": {"w": "Orð", "t": "Mark", "s": "Útgreining", "m": "Útgreinað marking"},
"en": {"w": "Word", "t": "Tag", "s": "Analysis", "m": "Expanded tags"},
}
MODEL_LINK = "https://huggingface.co/Setur/BRAGD"
# ----------------------------
# Minimal CSS: ONLY the buttons (and a tiny header layout helper)
# ----------------------------
CSS = """
/* Keep Gradio default styling; only override our buttons. */
#btn_tag, #lang_fo_on, #lang_en_on{
background:#89AFA9 !important;
border-color:#6F9992 !important;
color:#0b1b19 !important;
}
#btn_tag:hover, #lang_fo_on:hover, #lang_en_on:hover{
background:#6F9992 !important;
border-color:#6F9992 !important;
color:#0b1b19 !important;
}
#lang_fo_off, #lang_en_off, #btn_dl_main, #btn_dl_exp{
background:#C6DAD6 !important;
border-color:#6F9992 !important;
color:#0b1b19 !important;
}
#lang_fo_off:hover, #lang_en_off:hover, #btn_dl_main:hover, #btn_dl_exp:hover{
background:#89AFA9 !important;
border-color:#6F9992 !important;
color:#0b1b19 !important;
}
@media (prefers-color-scheme: dark){
#lang_fo_off, #lang_en_off, #btn_dl_main, #btn_dl_exp{
background:#2a3b38 !important;
border-color:#6F9992 !important;
color:#e7eceb !important;
}
#lang_fo_off:hover, #lang_en_off:hover, #btn_dl_main:hover, #btn_dl_exp:hover{
background:#89AFA9 !important;
border-color:#6F9992 !important;
color:#0b1b19 !important;
}
}
#results_hdr{
display:flex !important;
align-items:center !important;
gap:12px !important;
}
#results_hdr > .gr-markdown{ flex:1 1 auto !important; }
#lang_buttons{
display:flex !important;
gap:10px !important;
justify-content:flex-end !important;
align-items:center !important;
flex-wrap:nowrap !important;
}
#lang_buttons .gr-button, #lang_buttons button{
width:auto !important;
min-width:120px !important;
flex:0 0 auto !important;
}
#expanded_hdr{
display:flex !important;
align-items:center !important;
gap:12px !important;
}
#expanded_hdr > .gr-markdown{ flex:1 1 auto !important; }
#expanded_buttons{
display:flex !important;
gap:10px !important;
justify-content:flex-end !important;
align-items:center !important;
flex-wrap:nowrap !important;
}
#expanded_buttons .gr-button, #expanded_buttons button{
width:auto !important;
min-width:120px !important;
flex:0 0 auto !important;
}
#input_col,
#input_col > div,
#input_col .gr-block,
#input_col .gr-box,
#input_col .gr-panel,
#input_col .gr-group,
#input_col .gr-form{
background: transparent !important;
border: 0 !important;
box-shadow: none !important;
}
#btn_tag{
align-self:flex-start !important;
flex:0 0 auto !important;
height:fit-content !important;
}
#btn_tag button{
height:auto !important;
}
#out_df .df-scroll, #out_mean_df .df-scroll{
overflow-x:auto !important;
width:100% !important;
}
#out_df table.df-table, #out_mean_df table.df-table{
border-collapse:collapse !important;
width:max-content !important;
min-width:100% !important;
}
#out_df th, #out_df td,
#out_mean_df th, #out_mean_df td{
white-space:nowrap !important;
padding:10px 12px !important;
border:1px solid rgba(0,0,0,0.12) !important;
text-align:left !important;
vertical-align:top !important;
}
#out_df thead th, #out_mean_df thead th{
font-weight:600 !important;
background: rgba(0,0,0,0.03) !important;
}
@media (prefers-color-scheme: dark){
#out_df th, #out_df td,
#out_mean_df th, #out_mean_df td{
border:1px solid rgba(255,255,255,0.14) !important;
}
#out_df thead th, #out_mean_df thead th{
background: rgba(255,255,255,0.06) !important;
}
}
"""
# ----------------------------
# Tokenization
# ----------------------------
def simp_tok(sentence: str):
return re.findall(r"\w+|[" + re.escape(string.punctuation) + "]", sentence)
# ----------------------------
# Sentence splitting
# ----------------------------
def split_sentences(text: str):
"""Split input into sentences.
We use FO-Tokenizer sentence markers (BEGIN_SENT / END_SENT) when possible.
Important detail: some FO-Tokenizer builds emit *whitespace* as "descriptor-only"
tokens (empty `.txt`). If we simply join `.txt` pieces we can lose spaces and end
up with merged words (e.g. `Núriggarkanska`). This function therefore:
- preserves `.txt` pieces as-is
- converts descriptor-only whitespace-like tokens into a single space
- adds a best-effort inserted space between tokens in cases where whitespace
is missing but clearly intended (word→word, comma/semicolon/colon→word)
"""
s = (text or "")
if not s.strip():
return []
def _norm(piece: str) -> str:
return re.sub(r"[\r\n]+", " ", piece)
def _append_piece(buf: list[str], piece: str) -> None:
if not piece:
return
piece = _norm(piece)
if not buf:
buf.append(piece)
return
last = buf[-1]
last_char = last[-1] if last else ""
if last_char.isspace():
buf.append(piece)
return
if piece[0].isalnum() and (last_char.isalnum() or last_char in {",", ";", ":"}):
buf.append(" ")
buf.append(piece)
if _HAS_FOTOKENIZER:
try:
toks = fo_tokenize(s)
sents: list[str] = []
cur: list[str] = []
for tok in toks:
if getattr(tok, "txt", None):
_append_piece(cur, tok.txt)
continue
descr = FO_TOK.descr.get(tok.kind, "").replace(" ", "_")
if descr == "BEGIN_SENT":
if cur:
sent = "".join(cur).strip()
if sent:
sents.append(sent)
cur = []
continue
if descr == "END_SENT":
sent = "".join(cur).strip()
if sent:
sents.append(sent)
cur = []
continue
up = descr.upper()
if "WHITESPACE" in up or "SPACE" in up or "TAB" in up:
_append_piece(cur, " ")
elif "NEWLINE" in up or ("LINE" in up and "BREAK" in up):
_append_piece(cur, " ")
elif up == "DASH":
_append_piece(cur, "-")
else:
pass
if cur:
sent = "".join(cur).strip()
if sent:
sents.append(sent)
return sents or [s.strip()]
except Exception:
pass
parts = re.split(r"(?<=[.!?])\s+", s.strip())
return [p.strip() for p in parts if p.strip()]
def run_model_multisentence(text: str):
"""Run the model sentence-by-sentence and concatenate the rows."""
rows_all = []
for sent in split_sentences(text):
rows_all.extend(run_model(sent))
return rows_all
# ----------------------------
# CSV mapping
# ----------------------------
def load_tag_mappings(path: str):
df = pd.read_csv(path)
feature_cols = list(df.columns[1:])
tag_to_features = {
row["Original Tag"]: row[1:].values.astype(int)
for _, row in df.iterrows()
}
features_to_tag = {
tuple(row[1:].values.astype(int)): row["Original Tag"]
for _, row in df.iterrows()
}
return tag_to_features, features_to_tag, len(feature_cols), feature_cols
def group_from_col(col: str):
if col == "Article":
return ("article", "A")
if col.startswith("No-Article "):
return ("article", col.split()[-1])
if col == "Proper Noun":
return ("proper", "P")
if col.startswith("Not-Proper-Noun "):
return ("proper", col.split()[-1])
prefixes = [
("Word Class ", "word_class"),
("Subcategory ", "subcategory"), ("No-Subcategory ", "subcategory"),
("Gender ", "gender"), ("No-Gender ", "gender"),
("Number ", "number"), ("No-Number ", "number"),
("Case ", "case"), ("No-Case ", "case"),
("Degree ", "degree"), ("No-Degree ", "degree"),
("Declension ", "declension"), ("No-Declension ", "declension"),
("Mood ", "mood"),
("Voice ", "voice"), ("No-Voice ", "voice"),
("Tense ", "tense"), ("No-Tense ", "tense"),
("Person ", "person"), ("No-Person ", "person"),
("Definite ", "definiteness"), ("Indefinite ", "definiteness"),
]
for p, g in prefixes:
if col.startswith(p):
return (g, col.split()[-1])
return (None, None)
def process_tag_features(tag_to_features: dict, intervals):
arrs = [np.array(tpl) for tpl in set(tuple(a) for a in tag_to_features.values())]
wt_masks = {wt: [a for a in arrs if a[wt] == 1] for wt in range(15)}
out = {}
for wt, labels in wt_masks.items():
if not labels:
out[wt] = []
continue
sum_labels = np.sum(np.array(labels), axis=0)
out[wt] = [iv for iv in intervals if np.sum(sum_labels[iv[0]:iv[1] + 1]) != 0]
return out
def predict_vectors(logits, attention_mask, begin_tokens, dict_intervals, vec_len):
softmax = torch.nn.Softmax(dim=0)
vectors = []
for idx in range(len(logits)):
if attention_mask[idx].item() != 1 or begin_tokens[idx] != 1:
continue
pred = logits[idx]
vec = torch.zeros(vec_len, device=logits.device)
wt = torch.argmax(softmax(pred[0:15])).item()
vec[wt] = 1
for (a, b) in dict_intervals.get(wt, []):
seg = pred[a:b + 1]
k = torch.argmax(softmax(seg)).item()
vec[a + k] = 1
vectors.append(vec)
return vectors
# ----------------------------
# Load labels
# ----------------------------
with open(LABELS_FILEPATH, "r", encoding="utf-8") as f:
LABELS = json.load(f)
def label_for(lang: str, group: str, wc: str, code: str) -> str:
lang = "fo" if lang == "fo" else "en"
by_wc = LABELS.get(lang, {}).get("by_word_class", {})
glob = LABELS.get(lang, {}).get("global", {})
if wc and wc in by_wc and code in by_wc[wc].get(group, {}):
return by_wc[wc][group][code]
return glob.get(group, {}).get(code, "")
def clean_label(s: str) -> str:
s = (s or "").strip()
s = re.sub(r"\s+", " ", s)
return s.strip(" -;,:").strip()
# ----------------------------
# Load model + mapping
# ----------------------------
tag_to_features, features_to_tag, VEC_LEN, FEATURE_COLS = load_tag_mappings(TAGS_FILEPATH)
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
model = AutoModelForTokenClassification.from_pretrained(MODEL_ID)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval()
MAX_TOKENS = int(TARGET_MAX_TOKENS)
_model_max = getattr(getattr(model, "config", None), "max_position_embeddings", None)
_tok_max = getattr(tokenizer, "model_max_length", None)
for _m in (_model_max, _tok_max):
if isinstance(_m, int) and 0 < _m < 100000:
MAX_TOKENS = min(MAX_TOKENS, _m)
if hasattr(model, "config") and hasattr(model.config, "num_labels") and model.config.num_labels != VEC_LEN:
raise RuntimeError(f"Label size mismatch: model={model.config.num_labels}, csv={VEC_LEN}. Wrong CSV?")
DICT_INTERVALS = process_tag_features(tag_to_features, INTERVALS)
GROUPS = defaultdict(list)
for i, col in enumerate(FEATURE_COLS):
g, code = group_from_col(col)
if g and code not in HIDE_CODES.get(g, set()):
GROUPS[g].append((i, code, col))
def vector_to_tag(vec: torch.Tensor) -> str:
return features_to_tag.get(tuple(vec.int().tolist()), "Unknown Tag")
def wc_code(vec: torch.Tensor) -> str:
for idx, code, _ in GROUPS["word_class"]:
if int(vec[idx].item()) == 1:
return code
return ""
def group_code(vec: torch.Tensor, group: str) -> str:
hidden = HIDE_CODES.get(group, set())
for idx, code, _ in GROUPS.get(group, []):
if code in hidden:
continue
if int(vec[idx].item()) == 1:
return code
return ""
HIDE_IN_ANALYSIS = {("D", "subcategory", "G"), ("D", "subcategory", "N")}
VOICE_ANALYSIS = {
"fo": {"A": "gerðsøgn", "M": "miðalsøgn", "v": "orð luttøkuháttur"},
"en": {"A": "active voice", "M": "middle voice", "v": "supine form"},
}
def analysis_text(vec: torch.Tensor, lang: str) -> str:
lang = "fo" if lang == "fo" else "en"
tag = vector_to_tag(vec)
wc = wc_code(vec)
mood_code = group_code(vec, "mood") if wc == "V" else ""
skip_empty_verb_feats = (wc == "V" and mood_code in {"I", "M"})
if tag == "DGd":
return "fyriseting" if lang == "fo" else "preposition"
mood = group_code(vec, "mood")
if mood == "U":
sup = label_for(lang, "mood", wc, "U") or ("luttøkuháttur" if lang == "fo" else "supine")
vcode = group_code(vec, "voice") or "v"
vlabel = VOICE_ANALYSIS[lang].get(vcode, VOICE_ANALYSIS[lang]["v"])
return f"{clean_label(sup)}, {clean_label(vlabel)}"
parts = []
if wc in {"P", "C"}:
subc = group_code(vec, "subcategory")
subl = clean_label(label_for(lang, "subcategory", wc, subc) or "")
if subl:
parts.append(subl)
else:
wcl = clean_label(label_for(lang, "word_class", wc, wc) or wc)
if wcl:
parts.append(wcl)
for g in GROUP_ORDER:
c = group_code(vec, g)
if not c:
continue
if skip_empty_verb_feats and g in {"number", "tense", "person"} and c in {"n", "t", "p"}:
continue
if wc in {"P", "C"} and g == "subcategory":
continue
if (wc, g, c) in HIDE_IN_ANALYSIS:
continue
lbl = clean_label(label_for(lang, g, wc, c) or label_for(lang, g, "", c) or "")
if lbl and lbl not in parts:
parts.append(lbl)
return ", ".join(parts)
def expanded_text(vec: torch.Tensor, lang: str) -> str:
lang = "fo" if lang == "fo" else "en"
wc = wc_code(vec)
parts = []
wc_lbl = label_for(lang, "word_class", wc, wc)
parts.append(f"{wc}{wc_lbl}" if wc_lbl else wc)
for g in GROUP_ORDER:
c = group_code(vec, g)
if not c:
continue
lbl = label_for(lang, g, wc, c) or label_for(lang, g, "", c)
parts.append(f"{c}{lbl}" if lbl else c)
return "; ".join([p for p in parts if p])
def compute_codes_by_wc():
codes = defaultdict(lambda: defaultdict(set))
for arr in tag_to_features.values():
arr = np.array(arr)
wc = None
for idx, code, _ in GROUPS["word_class"]:
if arr[idx] == 1:
wc = code
break
if not wc:
continue
for g in GROUP_ORDER:
hidden = HIDE_CODES.get(g, set())
for idx, code, _ in GROUPS.get(g, []):
if code in hidden:
continue
if arr[idx] == 1:
codes[wc][g].add(code)
return codes
CODES_BY_WC = compute_codes_by_wc()
def build_overview(lang: str) -> str:
lang = "fo" if lang == "fo" else "en"
title = "### Markayvirlit" if lang == "fo" else "### Tag Overview"
lines = [title, ""]
for wc in sorted(CODES_BY_WC.keys()):
wcl = label_for(lang, "word_class", wc, wc) or ""
lines.append(f"#### {wc}{wcl}" if wcl else f"#### {wc}")
for g in GROUP_ORDER:
cs = sorted(CODES_BY_WC[wc].get(g, set()))
if not cs:
continue
group_name = {
"fo": {
"subcategory": "Undirflokkur", "gender": "Kyn", "number": "Tal", "case": "Fall",
"article": "Bundni/óbundni", "proper": "Sernavn / felagsnavn", "degree": "Stig",
"declension": "Bending", "mood": "Háttur", "voice": "Søgn", "tense": "Tíð",
"person": "Persónur", "definiteness": "Bundni/óbundni"
},
"en": {
"subcategory": "Subcategory", "gender": "Gender", "number": "Number", "case": "Case",
"article": "Definiteness", "proper": "Proper/common noun", "degree": "Degree",
"declension": "Declension", "mood": "Mood", "voice": "Voice", "tense": "Tense",
"person": "Person", "definiteness": "Definiteness"
},
}[lang].get(g, g)
lines.append(f"**{group_name}**")
for c in cs:
lbl = label_for(lang, g, wc, c) or label_for(lang, g, "", c)
lines.append(f"- `{c}` — {lbl}" if lbl else f"- `{c}`")
lines.append("")
lines.append("")
return "\n".join(lines).strip()
def run_model(sentence: str):
s = (sentence or "").strip()
if not s:
return []
tokens = simp_tok(s)
if not tokens:
return []
enc = tokenizer(
tokens,
is_split_into_words=True,
add_special_tokens=True,
max_length=MAX_TOKENS,
padding="max_length",
truncation=True,
return_attention_mask=True,
return_tensors="pt",
)
input_ids = enc["input_ids"].to(device)
attention_mask = enc["attention_mask"].to(device)
word_ids = enc.word_ids(batch_index=0)
begin, last = [], None
for wid in word_ids:
if wid is None:
begin.append(0)
elif wid != last:
begin.append(1)
else:
begin.append(0)
last = wid
with torch.no_grad():
logits = model(input_ids=input_ids, attention_mask=attention_mask).logits[0]
vectors = predict_vectors(logits, attention_mask[0], begin, DICT_INTERVALS, VEC_LEN)
rows, vec_i, seen = [], 0, set()
for i, wid in enumerate(word_ids):
if wid is None or begin[i] != 1 or wid in seen:
continue
seen.add(wid)
word = tokens[wid] if wid < len(tokens) else "<UNK>"
vec = vectors[vec_i] if vec_i < len(vectors) else torch.zeros(VEC_LEN, device=device)
rows.append({"word": word, "vec": vec.int().tolist()})
vec_i += 1
return rows
def _make_html_table(headers, rows):
th = "".join(f"<th>{html.escape(str(h))}</th>" for h in headers)
body_rows = []
for row in rows:
tds = "".join(f"<td>{html.escape(str(c))}</td>" for c in row)
body_rows.append(f"<tr>{tds}</tr>")
body = "".join(body_rows)
return (
'<div class="df-scroll">'
f'<table class="df-table"><thead><tr>{th}</tr></thead><tbody>{body}</tbody></table>'
'</div>'
)
def render(rows_state, lang: str):
lang = "fo" if lang == "fo" else "en"
cols_main = [UI[lang]["w"], UI[lang]["t"], UI[lang]["s"]]
cols_mean = [UI[lang]["w"], UI[lang]["t"], UI[lang]["m"]]
if not rows_state:
return (_make_html_table(cols_main, []), _make_html_table(cols_mean, []), build_overview(lang))
out_main, out_mean = [], []
for r in rows_state:
vec = torch.tensor(r["vec"])
tag = vector_to_tag(vec)
out_main.append([r["word"], tag, analysis_text(vec, lang)])
out_mean.append([r["word"], tag, expanded_text(vec, lang)])
return (_make_html_table(cols_main, out_main), _make_html_table(cols_mean, out_mean), build_overview(lang))
def _write_tsv(df: pd.DataFrame, filename: str) -> str:
tmpdir = os.path.join(tempfile.gettempdir(), "marka_downloads", str(uuid.uuid4()))
os.makedirs(tmpdir, exist_ok=True)
path = os.path.join(tmpdir, filename)
df.to_csv(path, sep="\t", index=False, encoding="utf-8")
return path
def build_download_main(rows_state) -> str:
words, tags, fo_vals, en_vals = [], [], [], []
for r in (rows_state or []):
vec = torch.tensor(r["vec"])
tag = vector_to_tag(vec)
words.append(r["word"])
tags.append(tag)
fo_vals.append(analysis_text(vec, "fo"))
en_vals.append(analysis_text(vec, "en"))
df = pd.DataFrame({
UI["fo"]["w"]: words,
UI["fo"]["t"]: tags,
UI["fo"]["s"]: fo_vals,
UI["en"]["s"]: en_vals,
})
return _write_tsv(df, "Markað.tsv")
def build_download_expanded(rows_state, lang: str) -> str:
lang = "fo" if lang == "fo" else "en"
words, tags, vals = [], [], []
for r in (rows_state or []):
vec = torch.tensor(r["vec"])
tag = vector_to_tag(vec)
words.append(r["word"])
tags.append(tag)
vals.append(expanded_text(vec, lang))
df = pd.DataFrame({
UI[lang]["w"]: words,
UI[lang]["t"]: tags,
UI[lang]["m"]: vals,
})
return _write_tsv(df, "Markað_útgreinað.tsv")
with gr.Blocks(css=CSS, title="Marka") as demo:
with gr.Row(equal_height=False):
with gr.Column(scale=2, elem_id="input_col"):
inp = gr.Textbox(
lines=6,
placeholder="Skriva her ... / Type here ...",
show_label=False,
elem_id="input_box",
)
with gr.Column(scale=1, min_width=320):
gr.Markdown(
"## Marka\n"
"Skriv ein setning í kassan og fá hann markaðan.\n\n"
f"Myndil / Model: [{MODEL_ID}]({MODEL_LINK})"
)
btn = gr.Button("Marka / Tag", variant="primary", elem_id="btn_tag")
state = gr.State([])
lang_state = gr.State("fo")
results_hdr = gr.Row(elem_id="results_hdr", visible=True)
with results_hdr:
results_title = gr.Markdown("### Úrslit / Results")
with gr.Row(elem_id="lang_buttons") as lang_buttons_row:
btn_lang_fo_on = gr.Button("Føroyskt", variant="primary", elem_id="lang_fo_on", visible=False)
btn_lang_fo_off = gr.Button("Føroyskt", variant="secondary", elem_id="lang_fo_off", visible=False)
btn_lang_en_on = gr.Button("English", variant="primary", elem_id="lang_en_on", visible=False)
btn_lang_en_off = gr.Button("English", variant="secondary", elem_id="lang_en_off", visible=False)
btn_dl_main = gr.DownloadButton("Tak niður / Download", variant="secondary", elem_id="btn_dl_main", visible=False)
out_df = gr.HTML(value="", elem_id="out_df", visible=False)
expanded_acc = gr.Accordion("Útgreinað marking / Expanded tags", open=False, visible=False)
with expanded_acc:
with gr.Row(elem_id="expanded_hdr"):
gr.Markdown(" ")
with gr.Row(elem_id="expanded_buttons"):
btn_dl_exp = gr.DownloadButton("Tak niður / Download", variant="secondary", elem_id="btn_dl_exp", visible=False)
out_mean_df = gr.HTML(value="", elem_id="out_mean_df")
overview_acc = gr.Accordion("Markayvirlit / Tag Overview", open=False, visible=True)
with overview_acc:
overview_md = gr.Markdown(build_overview("fo"))
def show_loading(lang_current):
lang_current = "fo" if lang_current == "fo" else "en"
cols_main = [UI[lang_current]["w"], UI[lang_current]["t"], UI[lang_current]["s"]]
shell = _make_html_table(cols_main, [])
return (
gr.update(value=shell, visible=True),
gr.update(visible=False),
gr.update(visible=False),
gr.update(visible=False),
gr.update(value=""),
gr.update(value="Markar... / Tagging...", interactive=False),
)
def on_tag(text, lang_current):
rows = run_model_multisentence(text)
df_main, df_mean, overview = render(rows, lang_current)
show_fo = (lang_current == "fo")
show_en = (lang_current == "en")
have_rows = bool(rows)
dl_main_path = build_download_main(rows) if have_rows else None
dl_exp_path = build_download_expanded(rows, lang_current) if have_rows else None
return (
rows,
gr.update(value=df_main, visible=True),
gr.update(value=df_mean),
gr.update(value=overview),
gr.update(visible=True),
gr.update(visible=show_fo),
gr.update(visible=not show_fo),
gr.update(visible=show_en),
gr.update(visible=not show_en),
gr.update(value=dl_main_path, visible=have_rows),
gr.update(value=dl_exp_path, visible=have_rows),
lang_current,
gr.update(value="Marka / Tag", interactive=True),
)
def on_set_lang(rows, lang_value):
df_main, df_mean, overview = render(rows, lang_value)
show_fo = (lang_value == "fo")
show_en = (lang_value == "en")
have_rows = bool(rows)
dl_main_path = build_download_main(rows) if have_rows else None
dl_exp_path = build_download_expanded(rows, lang_value) if have_rows else None
return (
lang_value,
gr.update(value=df_main),
gr.update(value=df_mean),
gr.update(value=overview),
gr.update(visible=show_fo),
gr.update(visible=not show_fo),
gr.update(visible=show_en),
gr.update(visible=not show_en),
gr.update(value=dl_main_path, visible=have_rows),
gr.update(value=dl_exp_path, visible=have_rows),
)
def on_set_fo(rows):
return on_set_lang(rows, "fo")
def on_set_en(rows):
return on_set_lang(rows, "en")
_evt = btn.click(
show_loading,
inputs=[lang_state],
outputs=[out_df, btn_dl_main, btn_dl_exp, expanded_acc, out_mean_df, btn],
queue=False,
)
_evt.then(
on_tag,
inputs=[inp, lang_state],
outputs=[
state, out_df, out_mean_df, overview_md, expanded_acc,
btn_lang_fo_on, btn_lang_fo_off, btn_lang_en_on, btn_lang_en_off,
btn_dl_main, btn_dl_exp, lang_state, btn
],
queue=False,
)
btn_lang_fo_on.click(
on_set_fo,
inputs=[state],
outputs=[
lang_state, out_df, out_mean_df, overview_md,
btn_lang_fo_on, btn_lang_fo_off, btn_lang_en_on, btn_lang_en_off,
btn_dl_main, btn_dl_exp
],
queue=False,
)
btn_lang_fo_off.click(
on_set_fo,
inputs=[state],
outputs=[
lang_state, out_df, out_mean_df, overview_md,
btn_lang_fo_on, btn_lang_fo_off, btn_lang_en_on, btn_lang_en_off,
btn_dl_main, btn_dl_exp
],
queue=False,
)
btn_lang_en_on.click(
on_set_en,
inputs=[state],
outputs=[
lang_state, out_df, out_mean_df, overview_md,
btn_lang_fo_on, btn_lang_fo_off, btn_lang_en_on, btn_lang_en_off,
btn_dl_main, btn_dl_exp
],
queue=False,
)
btn_lang_en_off.click(
on_set_en,
inputs=[state],
outputs=[
lang_state, out_df, out_mean_df, overview_md,
btn_lang_fo_on, btn_lang_fo_off, btn_lang_en_on, btn_lang_en_off,
btn_dl_main, btn_dl_exp
],
queue=False,
)
if __name__ == "__main__":
demo.launch()