| import gradio as gr |
| import numpy as np |
| from usearch.index import Index |
| from sentence_transformers import SentenceTransformer |
| from datasets import load_dataset |
| from sentencex import segment |
| from usearch.index import search, MetricKind, Matches, BatchMatches |
| import csv |
|
|
| HTML_Output = """<html><head><style>/* Tooltip container */ |
| .tooltip { |
| position: relative; |
| width: 600px; |
| display: inline-block; |
| border-bottom: 1px dotted black; /* If you want dots under the hoverable text */ |
| } |
| |
| /* Tooltip text */ |
| .tooltip .tooltiptext { |
| visibility: hidden; |
| width: "100%"; |
| background-color: #555; |
| color: #34e1eb; |
| text-align: center; |
| padding: 5px 0; |
| border-radius: 6px; |
| |
| /* Position the tooltip text */ |
| position: absolute; |
| z-index: 1; |
| top: 125%; |
| left: 50%; |
| margin-left: -60px; |
| |
| /* Fade in tooltip */ |
| opacity: 0; |
| transition: opacity 0.3s; |
| } |
| |
| /* Tooltip arrow */ |
| .tooltip .tooltiptext::before { |
| content: ""; |
| position: absolute; |
| bottom: 100%; |
| left: 50%; |
| margin-left: -5px; |
| border-width: 5px; |
| border-style: solid; |
| border-color: #555 transparent transparent transparent; |
| } |
| |
| /* Show the tooltip text when you mouse over the tooltip container */ |
| .tooltip:hover .tooltiptext { |
| visibility: visible; |
| opacity: 1; |
| }</style></head><body>""" |
|
|
| model = SentenceTransformer("Corran/SciGenNomicEmbed",trust_remote_code=True) |
|
|
| rf = load_dataset("Corran/RhetoricFunctionsList")['train']['rhetoric_function'] |
|
|
| rf = list(rf) |
| rf_emb = model.encode(rf) |
|
|
|
|
| def get_matches(inputs): |
| global index, model, rf |
| paragraph_matches = [] |
|
|
| for input in inputs: |
| embs = model.encode(input,batch_size=128) |
|
|
| matches = search(rf_emb, embs, 3, MetricKind.L2sq, exact=True) |
| sentence_matches = [] |
| for match_ in matches: |
| sentence_matches.append((rf[match_.key],str(round(match_.distance,2)))) |
| paragraph_matches.append(sentence_matches) |
|
|
| return paragraph_matches |
|
|
|
|
| def return_rf_scores(abstract): |
| |
| sentences = list(segment("en", abstract)) |
| matches = get_matches(sentences) |
|
|
| output = HTML_Output |
|
|
| for s,m in zip(sentences,matches): |
| tooltip = [f"{mm[0]} : {mm[1]})<br>" for mm in m] |
| tooltip = "\n".join(tooltip) |
| output+=f"""<div class="tooltip">{s} |
| <span class="tooltiptext">{tooltip}</span> |
| </div><br>""" |
|
|
| output += "</body></html>" |
| |
| return output |
|
|
| examples = [] |
|
|
| with open("examples.tsv","r") as ex: |
| rd = csv.reader(ex, delimiter="\t", quotechar='"') |
| for row in rd: |
| examples.append(row) |
| |
|
|
| demo = gr.Interface(fn=return_rf_scores, inputs="text", outputs="html",examples=examples) |
| demo.launch() |
|
|