import torch import pandas as pd import requests import sys import torchvision.models as models import os from transformers import AutoTokenizer, PreTrainedModel, AutoModelForSequenceClassification import utils # -------------------------------- # DATASET # -------------------------------- """ Dataset contents: - 1000 subsets of text data, each subset stored under the key "subset_{i}" where i ranges from 0 to 999. Each subset is a dictionary with: -"prompts": List of 100 prompts in the subset -"labels": Tensor of true labels for the prompts in the subset, has shape (100) -"subset_id": Integer ID of the subset (from 0 to 999) """ # Load the dataset dataset = torch.load("datasets/fulltuning.pt") # Example: Acessing subsets subset_0 = dataset["subset_0"] print("Subset 0 keys:", subset_0.keys()) print("Subset ID:", subset_0["subset_id"]) print("Labels length:", len(subset_0["labels"])) print("First prompts:", subset_0["prompts"][:5]) print("First 5 labels:", subset_0["labels"][:5]) # -------------------------------- # QUERYING THE CLASSIFIER # -------------------------------- # This Code can be used to load and query the fully fine-tuned models. You also need to the available utils.py file. #|---------------------------------------------------------------------------------------------------| #| NOTE: "Missing or unexpected params" warnings are no reason for concern. They stem from the | #| fact that the model is first loaded without a classifier head, which is added afterwards. | #|---------------------------------------------------------------------------------------------------| # Use this tokenizer for OLMO... tokenizer = AutoTokenizer.from_pretrained("allenai/OLMo-1B-hf", trust_remote_code=True) # ...and this one for Pythia tokenizer = AutoTokenizer.from_pretrained("EleutherAI/pythia-410m", trust_remote_code=True) tokenizer.padding_side = "left" if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token # Usage example (fulltuning): model_path = "models/olmo-fulltuning" model = utils.get_fulltuning_model(model_path, model_type="olmo") # model_type can be "olmo" or "pythia" example_prompt = "I think, therefore I am.\n\nI am." inputs = tokenizer(example_prompt, return_tensors="pt", truncation=True) inputs = {k: v.to(model.device) for k, v in inputs.items()} with torch.no_grad(): outputs = model(**inputs) logits = outputs.logits print(f"Logits shape: {logits.shape}") print(f"Logits: {logits}") # Usage example (softprompt): model_path = "models/olmo-softprompt" model = utils.get_peft_model(model_path, model_type="olmo") # model_type can be "olmo" or "pythia" example_prompt = "I think, but do I exist?\n\nSince you think, you exist." inputs = tokenizer(example_prompt, return_tensors="pt", truncation=True) inputs = {k: v.to(model.device) for k, v in inputs.items()} with torch.no_grad(): outputs = model(**inputs) logits = outputs.logits print(f"Logits shape: {logits.shape}") print(f"Logits: {logits}") # Usage example (lora): model_path = "models/olmo-lora" model = utils.get_peft_model(model_path, model_type="olmo") # model_type can be "olmo" or "pythia" example_prompt = "Who am I?\n\nWhat am I?" inputs = tokenizer(example_prompt, return_tensors="pt", truncation=True) inputs = {k: v.to(model.device) for k, v in inputs.items()} with torch.no_grad(): outputs = model(**inputs) logits = outputs.logits print(f"Logits shape: {logits.shape}") print(f"Logits: {logits}") # Usage example (lastlayer): model_path = "models/olmo-lastlayer" model = utils.get_peft_model(model_path, model_type="olmo") # model_type can be "olmo" or "pythia" example_prompt = "I love to exist!" inputs = tokenizer(example_prompt, return_tensors="pt", truncation=True) inputs = {k: v.to(model.device) for k, v in inputs.items()} with torch.no_grad(): outputs = model(**inputs) logits = outputs.logits print(f"Logits shape: {logits.shape}") print(f"Logits: {logits}") # Usage example (prefix): model_path = "models/olmo-prefix" model = utils.get_peft_model(model_path, model_type="olmo") # model_type can be "olmo" or "pythia" example_prompt = "I will exist yesterday." inputs = tokenizer(example_prompt, return_tensors="pt", truncation=True) inputs = {k: v.to(model.device) for k, v in inputs.items()} with torch.no_grad(): outputs = utils.forward_peft_seqcls(model, **inputs) logits = outputs.logits print(f"Logits shape: {logits.shape}") print(f"Logits: {logits}") # -------------------------------- # SUBMISSION FORMAT # -------------------------------- """ The submission must be a .csv file with the following format: -"type": Name of the model (e.g., "softprompt", "fulltuning", etc.) -"subset_id": ID of the subset (from 0 to 999, per type) -"membership": Membership score for each subset (float) """ # Example Submission: types = ["softprompt", "fulltuning", "lora", "lastlayer", "prefix"] type_list = [] for t in types: type_list.extend([t] * 1000) subset_ids = [] for _ in types: subset_ids.extend(list(range(1000))) membership_scores = torch.rand(5000).tolist() submission_df = pd.DataFrame({ "type": type_list, "subset_id": subset_ids, "membership": membership_scores }) submission_df.to_csv("example_submission.csv", index=None) # -------------------------------- # SUBMISSION PROCESS # -------------------------------- """ Example submission script for the LLM Dataset Membership Inference Task. Submission Requirements (read carefully to avoid automatic rejection): 1. CSV FORMAT ---------------- - The file **must be a CSV** with extension `.csv`. - It must contain **exactly three columns**, named: type, subset_id, membership → Column names must match exactly (lowercase, no extra spaces). → Column order does not matter, but all three must be present. 2. ROW COUNT AND IDENTIFIERS ------------------------------- - Your file must contain **exactly 5000 rows**. - Each row corresponds to one unique `subset_id`/`type` pair, with ids in the range **0–999** (inclusive). - Every subset_id must appear **exactly once** for each type. - Do **not** add, remove, or rename any IDs. - Do **not** include duplicates or missing entries. - The evaluator checks: subset_id.min() == 0 subset_id.max() == 999 subset_id.unique().size == 1000 3. MEMBERSHIP SCORES ---------------------- - The `membership` column must contain **numeric values** representing your model’s predicted confidence that the corresponding subset is a **member** of the training set. Examples of valid membership values: - Probabilities: values in [0.0, 1.0] - Raw model scores: any finite numeric values (will be ranked for TPR@FPR=0.05) - Do **not** submit string labels like "yes"/"no" or "member"/"non-member". - The evaluator converts your `membership` column to numeric using `pd.to_numeric()`. → Any non-numeric, NaN, or infinite entries will cause automatic rejection. 4. TECHNICAL LIMITS ---------------------- - Maximum file size: **20 MB** - Encoding: UTF-8 recommended. - Avoid extra columns, blank lines, or formulas. - Ensure all values are numeric and finite. - Supported data types: int, float (e.g., float32, float64) 5. VALIDATION SUMMARY ------------------------ Your submission will fail if: - Columns don’t match exactly ("type", "subset_id", "membership") - Row count differs from 5000 - Any type name is unexpected or not in the allowed set - Any subset_id is missing, duplicated, or outside [0, 999] for any type - Any membership value is NaN, Inf, or non-numeric - File is too large or not a valid CSV Two key metrics are computed: 1. **ROC-AUC (Area Under the ROC Curve)** — measures overall discriminative ability. 2. **TPR@FPR=0.05** — true positive rate when the false positive rate is at 5%. """ BASE_URL = "http://35.192.205.84:80" API_KEY = "YOUR_API_KEY_HERE" # replace with your actual API key TASK_ID = "14-llm-dataset-inference" FILE_PATH = "Your-Submission-File.csv" # replace with your actual file path SUBMIT = False # Set to True to enable submission def die(msg): print(f"{msg}", file=sys.stderr) sys.exit(1) if SUBMIT: if not os.path.isfile(FILE_PATH): die(f"File not found: {FILE_PATH}") try: with open(FILE_PATH, "rb") as f: files = { # (fieldname) -> (filename, fileobj, content_type) "file": (os.path.basename(FILE_PATH), f, "csv"), } resp = requests.post( f"{BASE_URL}/submit/{TASK_ID}", headers={"X-API-Key": API_KEY}, files=files, timeout=(10, 120), # (connect timeout, read timeout) ) # Helpful output even on non-2xx try: body = resp.json() except Exception: body = {"raw_text": resp.text} if resp.status_code == 413: die("Upload rejected: file too large (HTTP 413). Reduce size and try again.") resp.raise_for_status() submission_id = body.get("submission_id") print("Successfully submitted.") print("Server response:", body) if submission_id: print(f"Submission ID: {submission_id}") except requests.exceptions.RequestException as e: detail = getattr(e, "response", None) print(f"Submission error: {e}") if detail is not None: try: print("Server response:", detail.json()) except Exception: print("Server response (text):", detail.text) sys.exit(1)