Spaces:

LibrAI
/

SCALAR

Sleeping

App Files Files Community

SCALAR / app.py

gentleman12138

Add large JSON files using Git LFS

c6ad631 about 1 year ago

raw

history blame contribute delete

4.74 kB

	import json
	import os
	import gradio as gr

	# Dictionary to store data for each difficulty
	datasets = {}
	difficulty_files = {
	"Easy": "data/scalar_easy.json",
	"Medium": "data/scalar_medium.json",
	"Hard": "data/scalar_hard.json"
	}

	# Load all json files
	for difficulty, filepath in difficulty_files.items():
	with open(filepath) as f:
	datasets[difficulty] = json.load(f)

	# Current state variables
	current_index = 0
	current_difficulty = "Easy" # default difficulty
	current_data = datasets[current_difficulty]

	# Update display function with difficulty parameter
	def update_display(index, difficulty=None):
	global current_index, current_difficulty, current_data

	# Update difficulty if provided
	if difficulty is not None and difficulty != current_difficulty:
	current_difficulty = difficulty
	current_data = datasets[difficulty]
	current_index = 0
	index = 0

	current_index = index
	item = current_data[current_index]

	# escape markdown but keep our color formatting
	question = item['question'].replace("<", "<").replace(">", ">")

	# Add styled background with rounded corners
	colored_question = question.replace(
	"[MASKED_CITATION]",
	'<span style="background-color: #ffebeb; padding: 2px 6px; border-radius: 4px; color: blue;">[MASKED_CITATION]</span>'
	)

	# Format the citation dictionary with indentation
	formatted_citation = json.dumps(item["citation"], indent=4, ensure_ascii=False)
	return colored_question, formatted_citation, item["reference_id"], item['label_id'], index

	def prev_item(difficulty):
	global current_index, current_data
	current_data = datasets[difficulty]
	current_index = (current_index - 1) % len(current_data)
	return update_display(current_index, difficulty)

	def next_item(difficulty):
	global current_index, current_data
	current_data = datasets[difficulty]
	current_index = (current_index + 1) % len(current_data)
	return update_display(current_index, difficulty)

	# Create Gradio interface
	with gr.Blocks() as demo:
	gr.Markdown("""
	## Data viewer for [SCALAR](https://arxiv.org/abs/2502.13753/)
	* SCALAR (Scientific Citation-based Live Assessment of Long-context Academic Reasoning) is a novel benchmark that leverages academic papers and their citation networks, featuring automatic generation of high-quality ground truth labels without human annotation, controllable difficulty levels, and a dynamic updating mechanism that prevents data contamination.
	* The benchmark provides a reliable and sustainable way to track progress in long-context understanding as LLM capabilities evolve.
	""")
	with gr.Column():
	with gr.Row():

	# Display current index
	with gr.Column():

	# Add difficulty selector
	difficulty_dropdown = gr.Dropdown(
	choices=list(difficulty_files.keys()),
	value="Easy",
	label="Difficulty Level"
	)

	index_slider = gr.Slider(minimum=0, maximum=len(datasets["Easy"]) - 1, step=1, label="Select Index", value=0)
	with gr.Row():
	prev_btn = gr.Button("Previous")
	next_btn = gr.Button("Next")

	# Display question and citation
	with gr.Column():
	citation_text = gr.TextArea(label="Citation", interactive=False)
	reference_id = gr.Textbox(label="Reference ID", interactive=False)
	label_id = gr.Textbox(label="Label ID", interactive=False)

	with gr.Row():
	question_md = gr.Markdown(label="Question")

	# Bind events
	difficulty_dropdown.change(
	fn=lambda d: gr.Slider(minimum=0, maximum=len(datasets[d]) - 1, step=1, value=0),
	inputs=[difficulty_dropdown],
	outputs=[index_slider]
	)

	index_slider.change(
	update_display,
	inputs=[index_slider, difficulty_dropdown],
	outputs=[question_md, citation_text, reference_id, label_id, index_slider]
	)

	prev_btn.click(
	prev_item,
	inputs=[difficulty_dropdown],
	outputs=[question_md, citation_text, reference_id, label_id, index_slider]
	)

	next_btn.click(
	next_item,
	inputs=[difficulty_dropdown],
	outputs=[question_md, citation_text, reference_id, label_id, index_slider]
	)

	# Initialize display
	demo.load(
	update_display,
	inputs=[index_slider, difficulty_dropdown],
	outputs=[question_md, citation_text, reference_id, label_id, index_slider]
	)

	# Launch interface
	demo.launch()