Quentin Mace commited on
Commit
8b35e3a
·
1 Parent(s): b62a21e
Files changed (3) hide show
  1. app.py +9 -7
  2. app/utils.py +14 -6
  3. data/pipeline_handler.py +3 -3
app.py CHANGED
@@ -40,13 +40,14 @@ def main():
40
 
41
  deprecated_model_handler = DeprecatedModelHandler()
42
  initial_metric = "ndcg_at_5"
 
43
 
44
  # Get pipeline evaluation results
45
  pipeline_handler = PipelineHandler()
46
  pipeline_handler.get_pipeline_data()
47
  initial_language = "overall"
48
- data_pipeline = pipeline_handler.render_df(initial_metric, initial_language)
49
- data_pipeline = add_rank_and_format(data_pipeline, benchmark_version=3)
50
 
51
  num_datasets_pipeline = len(data_pipeline.columns) - 4 # Excluding Rank, Model, QPS, Average
52
  num_scores_pipeline = len(data_pipeline) * num_datasets_pipeline
@@ -133,7 +134,7 @@ def main():
133
  ></iframe>
134
  """
135
  )
136
- with gr.TabItem("ViDoRe V3 (Pipeline Eval)"):
137
  gr.Markdown("# ViDoRe V3 (Pipeline Evaluation): Retrieval Performance for complex pipelines🔍⚙️")
138
  gr.Markdown("### Complete pipeline evaluation including compute costs and timing metrics")
139
 
@@ -157,7 +158,7 @@ def main():
157
  language_choices.append((lang.capitalize(), lang))
158
 
159
  with gr.Row():
160
- metric_dropdown_pipeline = gr.Dropdown(choices=METRICS, value=initial_metric, label="Select Metric")
161
  language_dropdown_pipeline = gr.Dropdown(
162
  choices=language_choices,
163
  value="overall",
@@ -191,7 +192,7 @@ def main():
191
  def update_data_pipeline(metric, language, search_term, selected_columns):
192
  pipeline_handler.get_pipeline_data()
193
  data = pipeline_handler.render_df(metric, language)
194
- data = add_rank_and_format(data, benchmark_version=3, selected_columns=selected_columns)
195
  data = filter_models(data, search_term)
196
  if selected_columns:
197
  # Include core columns plus selected dataset columns
@@ -206,7 +207,8 @@ def main():
206
  refresh_button_pipeline.click(
207
  lambda metric, language: add_rank_and_format(
208
  pipeline_handler.render_df(metric, language),
209
- benchmark_version=3
 
210
  ),
211
  inputs=[metric_dropdown_pipeline, language_dropdown_pipeline],
212
  outputs=dataframe_pipeline,
@@ -224,7 +226,7 @@ def main():
224
  def refresh_pipeline_data(metric, language):
225
  """Refresh pipeline data when metric or language changes."""
226
  df = pipeline_handler.render_df(metric, language)
227
- return add_rank_and_format(df, benchmark_version=3)
228
 
229
  metric_dropdown_pipeline.change(
230
  refresh_pipeline_data,
 
40
 
41
  deprecated_model_handler = DeprecatedModelHandler()
42
  initial_metric = "ndcg_at_5"
43
+ initial_metric_v3 = "ndcg_at_10"
44
 
45
  # Get pipeline evaluation results
46
  pipeline_handler = PipelineHandler()
47
  pipeline_handler.get_pipeline_data()
48
  initial_language = "overall"
49
+ data_pipeline = pipeline_handler.render_df(initial_metric_v3, initial_language)
50
+ data_pipeline = add_rank_and_format(data_pipeline, benchmark_version=3, is_pipeline=True)
51
 
52
  num_datasets_pipeline = len(data_pipeline.columns) - 4 # Excluding Rank, Model, QPS, Average
53
  num_scores_pipeline = len(data_pipeline) * num_datasets_pipeline
 
134
  ></iframe>
135
  """
136
  )
137
+ with gr.TabItem("ViDoRe V3 (Pipeline)"):
138
  gr.Markdown("# ViDoRe V3 (Pipeline Evaluation): Retrieval Performance for complex pipelines🔍⚙️")
139
  gr.Markdown("### Complete pipeline evaluation including compute costs and timing metrics")
140
 
 
158
  language_choices.append((lang.capitalize(), lang))
159
 
160
  with gr.Row():
161
+ metric_dropdown_pipeline = gr.Dropdown(choices=METRICS, value=initial_metric_v3, label="Select Metric")
162
  language_dropdown_pipeline = gr.Dropdown(
163
  choices=language_choices,
164
  value="overall",
 
192
  def update_data_pipeline(metric, language, search_term, selected_columns):
193
  pipeline_handler.get_pipeline_data()
194
  data = pipeline_handler.render_df(metric, language)
195
+ data = add_rank_and_format(data, benchmark_version=3, selected_columns=selected_columns, is_pipeline=True)
196
  data = filter_models(data, search_term)
197
  if selected_columns:
198
  # Include core columns plus selected dataset columns
 
207
  refresh_button_pipeline.click(
208
  lambda metric, language: add_rank_and_format(
209
  pipeline_handler.render_df(metric, language),
210
+ benchmark_version=3,
211
+ is_pipeline=True
212
  ),
213
  inputs=[metric_dropdown_pipeline, language_dropdown_pipeline],
214
  outputs=dataframe_pipeline,
 
226
  def refresh_pipeline_data(metric, language):
227
  """Refresh pipeline data when metric or language changes."""
228
  df = pipeline_handler.render_df(metric, language)
229
+ return add_rank_and_format(df, benchmark_version=3, is_pipeline=True)
230
 
231
  metric_dropdown_pipeline.change(
232
  refresh_pipeline_data,
app/utils.py CHANGED
@@ -1,7 +1,14 @@
1
 
2
 
3
- def make_clickable_model(model_name, link=None):
4
- if link is None:
 
 
 
 
 
 
 
5
  desanitized_model_name = model_name.replace("__", "/")
6
  desanitized_model_name = desanitized_model_name.replace("_", "/")
7
  desanitized_model_name = desanitized_model_name.replace("-thisisapoint-", ".")
@@ -11,7 +18,8 @@ def make_clickable_model(model_name, link=None):
11
  if "/ocr" in desanitized_model_name:
12
  desanitized_model_name = desanitized_model_name.replace("/ocr", "")
13
 
14
- link = "https://huggingface.co/" + desanitized_model_name
 
15
 
16
  return f'<a target="_blank" style="text-decoration: underline" href="{link}">{desanitized_model_name}</a>'
17
 
@@ -51,11 +59,11 @@ def add_rank(df, benchmark_version=1, selected_columns=None):
51
  return df
52
 
53
 
54
- def add_rank_and_format(df, benchmark_version=1, selected_columns=None):
55
  df = df.reset_index()
56
  df = df.rename(columns={"index": "Model"})
57
  df = add_rank(df, benchmark_version, selected_columns)
58
- df["Model"] = df["Model"].apply(make_clickable_model)
59
  # df = remove_duplicates(df)
60
  return df
61
 
@@ -92,7 +100,7 @@ def get_pipeline_refresh_function(pipeline_handler):
92
  def _refresh(metric):
93
  pipeline_handler.get_pipeline_data()
94
  data = pipeline_handler.render_df(metric)
95
- df = add_rank_and_format(data, benchmark_version=3)
96
  return df
97
 
98
  return _refresh
 
1
 
2
 
3
+ def make_clickable_model(model_name, link=None, is_pipeline=False):
4
+ if is_pipeline:
5
+ # For pipelines: keep underscores as-is, only process __ and -thisisapoint-
6
+ desanitized_model_name = model_name.replace("__", "/")
7
+ desanitized_model_name = desanitized_model_name.replace("-thisisapoint-", ".")
8
+ if link is None:
9
+ link = f"https://github.com/illuin-tech/vidore-benchmark/blob/vidore_v3_pipeline/results/pipeline_descriptions/{desanitized_model_name}/description.json"
10
+ else:
11
+ # For regular models: replace __ and _ with /, and -thisisapoint- with .
12
  desanitized_model_name = model_name.replace("__", "/")
13
  desanitized_model_name = desanitized_model_name.replace("_", "/")
14
  desanitized_model_name = desanitized_model_name.replace("-thisisapoint-", ".")
 
18
  if "/ocr" in desanitized_model_name:
19
  desanitized_model_name = desanitized_model_name.replace("/ocr", "")
20
 
21
+ if link is None:
22
+ link = "https://huggingface.co/" + desanitized_model_name
23
 
24
  return f'<a target="_blank" style="text-decoration: underline" href="{link}">{desanitized_model_name}</a>'
25
 
 
59
  return df
60
 
61
 
62
+ def add_rank_and_format(df, benchmark_version=1, selected_columns=None, is_pipeline=False):
63
  df = df.reset_index()
64
  df = df.rename(columns={"index": "Model"})
65
  df = add_rank(df, benchmark_version, selected_columns)
66
+ df["Model"] = df["Model"].apply(lambda x: make_clickable_model(x, is_pipeline=is_pipeline))
67
  # df = remove_duplicates(df)
68
  return df
69
 
 
100
  def _refresh(metric):
101
  pipeline_handler.get_pipeline_data()
102
  data = pipeline_handler.render_df(metric)
103
+ df = add_rank_and_format(data, benchmark_version=3, is_pipeline=True)
104
  return df
105
 
106
  return _refresh
data/pipeline_handler.py CHANGED
@@ -10,7 +10,7 @@ class PipelineHandler:
10
 
11
  def __init__(self):
12
  self.pipeline_infos = {}
13
- self.github_base_url = "https://raw.githubusercontent.com/illuin-tech/vidore-benchmark/vidore_v3_pipeline/results"
14
  self.available_datasets = []
15
  self.available_languages = ["overall"] # Default languages available
16
 
@@ -23,7 +23,7 @@ class PipelineHandler:
23
 
24
  def get_pipeline_folders_from_github(self) -> List[str]:
25
  """Get list of pipeline folders from GitHub API."""
26
- api_url = "https://api.github.com/repos/illuin-tech/vidore-benchmark/contents/results?ref=vidore_v3_pipeline"
27
 
28
  try:
29
  response = requests.get(api_url, headers=self.headers)
@@ -39,7 +39,7 @@ class PipelineHandler:
39
 
40
  def get_dataset_files_from_github(self, pipeline_name: str) -> List[str]:
41
  """Get list of dataset JSON files for a specific pipeline from GitHub API."""
42
- api_url = f"https://api.github.com/repos/illuin-tech/vidore-benchmark/contents/results/{pipeline_name}?ref=vidore_v3_pipeline"
43
 
44
  try:
45
  response = requests.get(api_url, headers=self.headers)
 
10
 
11
  def __init__(self):
12
  self.pipeline_infos = {}
13
+ self.github_base_url = "https://raw.githubusercontent.com/illuin-tech/vidore-benchmark/vidore_v3_pipeline/results/metrics"
14
  self.available_datasets = []
15
  self.available_languages = ["overall"] # Default languages available
16
 
 
23
 
24
  def get_pipeline_folders_from_github(self) -> List[str]:
25
  """Get list of pipeline folders from GitHub API."""
26
+ api_url = "https://api.github.com/repos/illuin-tech/vidore-benchmark/contents/results/metrics?ref=vidore_v3_pipeline"
27
 
28
  try:
29
  response = requests.get(api_url, headers=self.headers)
 
39
 
40
  def get_dataset_files_from_github(self, pipeline_name: str) -> List[str]:
41
  """Get list of dataset JSON files for a specific pipeline from GitHub API."""
42
+ api_url = f"https://api.github.com/repos/illuin-tech/vidore-benchmark/contents/results/metrics/{pipeline_name}?ref=vidore_v3_pipeline"
43
 
44
  try:
45
  response = requests.get(api_url, headers=self.headers)