| from datasets import load_dataset |
| import streamlit as st |
| from ast import literal_eval |
| import pandas as pd |
|
|
|
|
| nlp_tasks = ["text-classification", "text-generation", "text2text-generation", "token-classification", "fill-mask", "question-answering", |
| "translation", "conversational", "sentence-similarity", "summarization", "multiple-choice", "zero-shot-classification", "table-question-answering" |
| ] |
| audio_tasks = ["automatic-speech-recognition", "audio-classification", "text-to-speech", "audio-to-audio", "voice-activity-detection"] |
| cv_tasks = ["image-classification", "image-segmentation", "zero-shot-image-classification", "image-to-image", "unconditional-image-generation", "object-detection"] |
| multimodal = ["feature-extraction", "text-to-image", "visual-question-answering", "image-to-text", "document-question-answering"] |
| tabular = ["tabular-classification", "tabular-regression"] |
|
|
| modalities = { |
| "nlp": nlp_tasks, |
| "audio": audio_tasks, |
| "cv": cv_tasks, |
| "multimodal": multimodal, |
| "tabular": tabular, |
| "rl": ["reinforcement-learning"] |
| } |
|
|
| def modality(row): |
| pipeline = row["pipeline"] |
| for modality, tasks in modalities.items(): |
| if pipeline in tasks: |
| return modality |
| if type(pipeline) == "str": |
| return "unk_modality" |
| return None |
|
|
| st.cache(allow_output_mutation=True) |
| def process_dataset(version): |
| |
| dataset = load_dataset("open-source-metrics/model-repos-stats", revision=version) |
|
|
| |
| data = dataset["train"].to_pandas() |
|
|
| |
| data["modality"] = data.apply(modality, axis=1) |
|
|
| |
| data["length_bins"] = pd.cut(data["text_length"], [0, 200, 1000, 2000, 3000, 4000, 5000, 7500, 10000, 20000, 50000]) |
|
|
| return data |
|
|
| def eval_tags(row): |
| tags = row["tags"] |
| if tags == "none" or tags == [] or tags == "{}": |
| return [] |
| if tags[0] != "[": |
| tags = str([tags]) |
| val = literal_eval(tags) |
| if isinstance(val, dict): |
| return [] |
| return val |
|
|
| def change_pct(old, new): |
| if new == 0: |
| return -10000000 |
| return round(100* (new - old) / new, 3) |
|
|
| def change_and_delta(old_old, old, new): |
| curr_change = change_pct(old, new) |
| prev_change = change_pct(old_old, old) |
| delta = round(curr_change-prev_change, 3) |
| if delta > 0: |
| delta = f"+{delta}%" |
| curr_change = f"{curr_change}%" |
| return curr_change, delta |