| import pandas as pd |
| from pathlib import Path |
| from datasets import load_dataset |
| import numpy as np |
| import os |
| import re |
|
|
| UNVERIFIED_MODELS = [ |
|
|
| ] |
|
|
| CONTAMINATED_MODELS = [ |
|
|
| ] |
|
|
| |
| def model_hyperlink(link, model_name): |
| |
| if len(model_name) > 50: |
| model_name = model_name[:47] + "..." |
| if model_name == "random": |
| output = "random" |
| elif model_name == "Cohere March 2024": |
| output = f'<a target="_blank" href="https://huggingface.co/Cohere" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>' |
| elif "openai" == model_name.split("/")[0]: |
| output = f'<a target="_blank" href="https://huggingface.co/openai" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>' |
| elif "Anthropic" == model_name.split("/")[0]: |
| output = f'<a target="_blank" href="https://huggingface.co/Anthropic" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>' |
| elif "google" == model_name.split("/")[0]: |
| output = f'<a target="_blank" href="https://huggingface.co/google" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>' |
| elif "PoLL" == model_name.split("/")[0]: |
| output = model_name |
| output = f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>' |
|
|
| if model_name in UNVERIFIED_MODELS: |
| output += " *" |
| if model_name in CONTAMINATED_MODELS: |
| output += " ⚠️" |
| return output |
|
|
| def undo_hyperlink(html_string): |
| |
| pattern = r'>[^<]+<' |
| match = re.search(pattern, html_string) |
| if match: |
| |
| return match.group(0)[1:-1] |
| else: |
| return "No text found" |
|
|
|
|
| |
| def load_all_data(data_repo, subdir:str, subsubsets=False): |
| dir = Path(data_repo) |
| data_dir = dir / subdir |
| |
| |
| models_names = [f.split(".json")[0] for f in os.listdir(data_dir) |
| if os.path.isfile(os.path.join(data_dir, f)) and f.endswith(".json")] |
| |
| df = pd.DataFrame() |
|
|
| |
| for model_name in models_names: |
| model_data = load_dataset("json", data_files=os.path.join(data_dir, model_name + ".json"), split="train") |
| model_data = model_data.add_column("model", [model_name]) |
| df2 = pd.DataFrame(model_data) |
| |
| df = pd.concat([df2, df]) |
| |
| return df |
|
|
|
|
| def prep_df(df): |
| |
| |
| df = df.reindex(sorted(df.columns), axis=1) |
|
|
| |
| cols = list(df.columns) |
| cols.insert(0, cols.pop(cols.index('model'))) |
| df = df.loc[:, cols] |
| |
| |
| df["model"] = df.apply(lambda row: model_hyperlink(f"https://huggingface.co/{row['path']}", row['model']), axis=1) |
| df = df.drop(columns=["path"]) |
|
|
| |
| cols = df.columns.tolist() |
| cols.remove("model") |
| cols = [c for c in cols if "rank" not in c and "confi" not in c] |
| df[cols] = (df[cols]*100) |
|
|
| |
| cols = list(df.columns) |
| cols.insert(1, cols.pop(cols.index('average'))) |
| df = df.loc[:, cols] |
|
|
| df = df.rename(columns={ |
| "model": "Model", |
| "average": "Average", |
| "brainstorm": "Brainstorm", |
| "open_qa": "Open QA", |
| "closed_qa": "Closed QA", |
| "extract": "Extract", |
| "generation": "Generation", |
| "rewrite": "Rewrite", |
| "summarize": "Summarize", |
| "classify": "Classify", |
| "reasoning_over_numerical_data": "Reasoning Over Numerical Data", |
| "multi-document_synthesis": "Multi-Document Synthesis", |
| "fact_checking_or_attributed_qa": "Fact Checking or Attributed QA", |
| }) |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| return df |
|
|
|
|
| def sort_by_category(df, category): |
| new_df = df.copy() |
| col_rank = category.lower().replace(" ", "_") + "_rank" |
| col_confi = category.lower().replace(" ", "_") + "_confi" |
|
|
| |
| new_df = new_df.sort_values(by=[col_rank, category], ascending=[True, False]) |
|
|
| |
| cols = list(new_df.columns) |
| cols.insert(0, cols.pop(cols.index(col_rank))) |
| new_df = new_df.loc[:, cols] |
| new_df = new_df.rename(columns={col_rank: "Rank"}) |
|
|
| |
| cols = list(new_df.columns) |
| cols.insert(2, cols.pop(cols.index(category))) |
| new_df = new_df.loc[:, cols] |
|
|
| |
| cols = list(new_df.columns) |
| cols.insert(3, cols.pop(cols.index(col_confi))) |
| new_df = new_df.loc[:, cols] |
| new_df = new_df.rename(columns={col_confi: "95% CI"}) |
|
|
|
|
| |
| new_df = new_df.drop(columns=[c for c in new_df.columns if c.endswith("rank")]) |
| new_df = new_df.drop(columns=[c for c in new_df.columns if c.endswith("confi")]) |
|
|
| return new_df |