File size: 9,646 Bytes
4bdf9a2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 | import torch
import pandas as pd
import requests
import sys
import torchvision.models as models
import os
from transformers import AutoTokenizer, PreTrainedModel, AutoModelForSequenceClassification
import utils
# --------------------------------
# DATASET
# --------------------------------
"""
Dataset contents:
- 1000 subsets of text data, each subset stored under the key "subset_{i}" where i ranges from 0 to 999.
Each subset is a dictionary with:
-"prompts": List of 100 prompts in the subset
-"labels": Tensor of true labels for the prompts in the subset, has shape (100)
-"subset_id": Integer ID of the subset (from 0 to 999)
"""
# Load the dataset
dataset = torch.load("datasets/fulltuning.pt")
# Example: Acessing subsets
subset_0 = dataset["subset_0"]
print("Subset 0 keys:", subset_0.keys())
print("Subset ID:", subset_0["subset_id"])
print("Labels length:", len(subset_0["labels"]))
print("First prompts:", subset_0["prompts"][:5])
print("First 5 labels:", subset_0["labels"][:5])
# --------------------------------
# QUERYING THE CLASSIFIER
# --------------------------------
# This Code can be used to load and query the fully fine-tuned models. You also need to the available utils.py file.
#|---------------------------------------------------------------------------------------------------|
#| NOTE: "Missing or unexpected params" warnings are no reason for concern. They stem from the |
#| fact that the model is first loaded without a classifier head, which is added afterwards. |
#|---------------------------------------------------------------------------------------------------|
# Use this tokenizer for OLMO...
tokenizer = AutoTokenizer.from_pretrained("allenai/OLMo-1B-hf", trust_remote_code=True)
# ...and this one for Pythia
tokenizer = AutoTokenizer.from_pretrained("EleutherAI/pythia-410m", trust_remote_code=True)
tokenizer.padding_side = "left"
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
# Usage example (fulltuning):
model_path = "models/olmo-fulltuning"
model = utils.get_fulltuning_model(model_path, model_type="olmo") # model_type can be "olmo" or "pythia"
example_prompt = "I think, therefore I am.\n\nI am."
inputs = tokenizer(example_prompt, return_tensors="pt", truncation=True)
inputs = {k: v.to(model.device) for k, v in inputs.items()}
with torch.no_grad():
outputs = model(**inputs)
logits = outputs.logits
print(f"Logits shape: {logits.shape}")
print(f"Logits: {logits}")
# Usage example (softprompt):
model_path = "models/olmo-softprompt"
model = utils.get_peft_model(model_path, model_type="olmo") # model_type can be "olmo" or "pythia"
example_prompt = "I think, but do I exist?\n\nSince you think, you exist."
inputs = tokenizer(example_prompt, return_tensors="pt", truncation=True)
inputs = {k: v.to(model.device) for k, v in inputs.items()}
with torch.no_grad():
outputs = model(**inputs)
logits = outputs.logits
print(f"Logits shape: {logits.shape}")
print(f"Logits: {logits}")
# Usage example (lora):
model_path = "models/olmo-lora"
model = utils.get_peft_model(model_path, model_type="olmo") # model_type can be "olmo" or "pythia"
example_prompt = "Who am I?\n\nWhat am I?"
inputs = tokenizer(example_prompt, return_tensors="pt", truncation=True)
inputs = {k: v.to(model.device) for k, v in inputs.items()}
with torch.no_grad():
outputs = model(**inputs)
logits = outputs.logits
print(f"Logits shape: {logits.shape}")
print(f"Logits: {logits}")
# Usage example (lastlayer):
model_path = "models/olmo-lastlayer"
model = utils.get_peft_model(model_path, model_type="olmo") # model_type can be "olmo" or "pythia"
example_prompt = "I love to exist!"
inputs = tokenizer(example_prompt, return_tensors="pt", truncation=True)
inputs = {k: v.to(model.device) for k, v in inputs.items()}
with torch.no_grad():
outputs = model(**inputs)
logits = outputs.logits
print(f"Logits shape: {logits.shape}")
print(f"Logits: {logits}")
# Usage example (prefix):
model_path = "models/olmo-prefix"
model = utils.get_peft_model(model_path, model_type="olmo") # model_type can be "olmo" or "pythia"
example_prompt = "I will exist yesterday."
inputs = tokenizer(example_prompt, return_tensors="pt", truncation=True)
inputs = {k: v.to(model.device) for k, v in inputs.items()}
with torch.no_grad():
outputs = utils.forward_peft_seqcls(model, **inputs)
logits = outputs.logits
print(f"Logits shape: {logits.shape}")
print(f"Logits: {logits}")
# --------------------------------
# SUBMISSION FORMAT
# --------------------------------
"""
The submission must be a .csv file with the following format:
-"type": Name of the model (e.g., "softprompt", "fulltuning", etc.)
-"subset_id": ID of the subset (from 0 to 999, per type)
-"membership": Membership score for each subset (float)
"""
# Example Submission:
types = ["softprompt", "fulltuning", "lora", "lastlayer", "prefix"]
type_list = []
for t in types:
type_list.extend([t] * 1000)
subset_ids = []
for _ in types:
subset_ids.extend(list(range(1000)))
membership_scores = torch.rand(5000).tolist()
submission_df = pd.DataFrame({
"type": type_list,
"subset_id": subset_ids,
"membership": membership_scores
})
submission_df.to_csv("example_submission.csv", index=None)
# --------------------------------
# SUBMISSION PROCESS
# --------------------------------
"""
Example submission script for the LLM Dataset Membership Inference Task.
Submission Requirements (read carefully to avoid automatic rejection):
1. CSV FORMAT
----------------
- The file **must be a CSV** with extension `.csv`.
- It must contain **exactly three columns**, named:
type, subset_id, membership
→ Column names must match exactly (lowercase, no extra spaces).
→ Column order does not matter, but all three must be present.
2. ROW COUNT AND IDENTIFIERS
-------------------------------
- Your file must contain **exactly 5000 rows**.
- Each row corresponds to one unique `subset_id`/`type` pair, with ids in the range **0–999** (inclusive).
- Every subset_id must appear **exactly once** for each type.
- Do **not** add, remove, or rename any IDs.
- Do **not** include duplicates or missing entries.
- The evaluator checks:
subset_id.min() == 0
subset_id.max() == 999
subset_id.unique().size == 1000
3. MEMBERSHIP SCORES
----------------------
- The `membership` column must contain **numeric values** representing your model’s predicted confidence
that the corresponding subset is a **member** of the training set.
Examples of valid membership values:
- Probabilities: values in [0.0, 1.0]
- Raw model scores: any finite numeric values (will be ranked for TPR@FPR=0.05)
- Do **not** submit string labels like "yes"/"no" or "member"/"non-member".
- The evaluator converts your `membership` column to numeric using `pd.to_numeric()`.
→ Any non-numeric, NaN, or infinite entries will cause automatic rejection.
4. TECHNICAL LIMITS
----------------------
- Maximum file size: **20 MB**
- Encoding: UTF-8 recommended.
- Avoid extra columns, blank lines, or formulas.
- Ensure all values are numeric and finite.
- Supported data types: int, float (e.g., float32, float64)
5. VALIDATION SUMMARY
------------------------
Your submission will fail if:
- Columns don’t match exactly ("type", "subset_id", "membership")
- Row count differs from 5000
- Any type name is unexpected or not in the allowed set
- Any subset_id is missing, duplicated, or outside [0, 999] for any type
- Any membership value is NaN, Inf, or non-numeric
- File is too large or not a valid CSV
Two key metrics are computed:
1. **ROC-AUC (Area Under the ROC Curve)** — measures overall discriminative ability.
2. **TPR@FPR=0.05** — true positive rate when the false positive rate is at 5%.
"""
BASE_URL = "http://35.192.205.84:80"
API_KEY = "YOUR_API_KEY_HERE" # replace with your actual API key
TASK_ID = "14-llm-dataset-inference"
FILE_PATH = "Your-Submission-File.csv" # replace with your actual file path
SUBMIT = False # Set to True to enable submission
def die(msg):
print(f"{msg}", file=sys.stderr)
sys.exit(1)
if SUBMIT:
if not os.path.isfile(FILE_PATH):
die(f"File not found: {FILE_PATH}")
try:
with open(FILE_PATH, "rb") as f:
files = {
# (fieldname) -> (filename, fileobj, content_type)
"file": (os.path.basename(FILE_PATH), f, "csv"),
}
resp = requests.post(
f"{BASE_URL}/submit/{TASK_ID}",
headers={"X-API-Key": API_KEY},
files=files,
timeout=(10, 120), # (connect timeout, read timeout)
)
# Helpful output even on non-2xx
try:
body = resp.json()
except Exception:
body = {"raw_text": resp.text}
if resp.status_code == 413:
die("Upload rejected: file too large (HTTP 413). Reduce size and try again.")
resp.raise_for_status()
submission_id = body.get("submission_id")
print("Successfully submitted.")
print("Server response:", body)
if submission_id:
print(f"Submission ID: {submission_id}")
except requests.exceptions.RequestException as e:
detail = getattr(e, "response", None)
print(f"Submission error: {e}")
if detail is not None:
try:
print("Server response:", detail.json())
except Exception:
print("Server response (text):", detail.text)
sys.exit(1)
|